Showing
1 changed file
with
28 additions
and
38 deletions
... | @@ -8,8 +8,6 @@ from Bio.PDB.mmcifio import MMCIFIO | ... | @@ -8,8 +8,6 @@ from Bio.PDB.mmcifio import MMCIFIO |
8 | from Bio.PDB.MMCIF2Dict import MMCIF2Dict | 8 | from Bio.PDB.MMCIF2Dict import MMCIF2Dict |
9 | from Bio.PDB.PDBExceptions import PDBConstructionWarning, BiopythonWarning | 9 | from Bio.PDB.PDBExceptions import PDBConstructionWarning, BiopythonWarning |
10 | from Bio.PDB.Dice import ChainSelector | 10 | from Bio.PDB.Dice import ChainSelector |
11 | -from Bio._py3k import urlretrieve as _urlretrieve | ||
12 | -from Bio._py3k import urlcleanup as _urlcleanup | ||
13 | from Bio.Alphabet import generic_rna | 11 | from Bio.Alphabet import generic_rna |
14 | from Bio.Seq import Seq | 12 | from Bio.Seq import Seq |
15 | from Bio.SeqRecord import SeqRecord | 13 | from Bio.SeqRecord import SeqRecord |
... | @@ -22,6 +20,21 @@ from time import sleep | ... | @@ -22,6 +20,21 @@ from time import sleep |
22 | from tqdm import tqdm | 20 | from tqdm import tqdm |
23 | from tqdm.contrib.concurrent import process_map | 21 | from tqdm.contrib.concurrent import process_map |
24 | 22 | ||
23 | +def trace_unhandled_exceptions(func): | ||
24 | + @wraps(func) | ||
25 | + def wrapped_func(*args, **kwargs): | ||
26 | + try: | ||
27 | + return func(*args, **kwargs) | ||
28 | + except: | ||
29 | + s = traceback.format_exc() | ||
30 | + with open(runDir + "/errors.txt", "a") as f: | ||
31 | + f.write("Exception in "+func.__name__+"\n") | ||
32 | + f.write(s) | ||
33 | + f.write("\n\n") | ||
34 | + | ||
35 | + warn('Exception in '+func.__name__, error=True) | ||
36 | + print(s) | ||
37 | + return wrapped_func | ||
25 | 38 | ||
26 | pd.set_option('display.max_rows', None) | 39 | pd.set_option('display.max_rows', None) |
27 | sqlite3.enable_callback_tracebacks(True) | 40 | sqlite3.enable_callback_tracebacks(True) |
... | @@ -123,7 +136,7 @@ class Chain: | ... | @@ -123,7 +136,7 @@ class Chain: |
123 | 136 | ||
124 | Chains accumulate information through this scipt, and are saved to files at the end of major steps.""" | 137 | Chains accumulate information through this scipt, and are saved to files at the end of major steps.""" |
125 | 138 | ||
126 | - def __init__(self, pdb_id, pdb_model, pdb_chain_id, chain_label, rfam="", inferred=False, pdb_start=None, pdb_end=None): | 139 | + def __init__(self, pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class, rfam="", inferred=False, pdb_start=None, pdb_end=None): |
127 | self.pdb_id = pdb_id # PDB ID | 140 | self.pdb_id = pdb_id # PDB ID |
128 | self.pdb_model = int(pdb_model) # model ID, starting at 1 | 141 | self.pdb_model = int(pdb_model) # model ID, starting at 1 |
129 | self.pdb_chain_id = pdb_chain_id # chain ID (mmCIF), multiple letters | 142 | self.pdb_chain_id = pdb_chain_id # chain ID (mmCIF), multiple letters |
... | @@ -193,6 +206,7 @@ class Chain: | ... | @@ -193,6 +206,7 @@ class Chain: |
193 | 206 | ||
194 | notify(status) | 207 | notify(status) |
195 | 208 | ||
209 | + @trace_unhandled_exceptions | ||
196 | def extract_3D_data(self): | 210 | def extract_3D_data(self): |
197 | """ Maps DSSR annotations to the chain. """ | 211 | """ Maps DSSR annotations to the chain. """ |
198 | 212 | ||
... | @@ -749,8 +763,7 @@ class Downloader: | ... | @@ -749,8 +763,7 @@ class Downloader: |
749 | print(f"\t> Download Rfam.cm.gz from Rfam..." + " " * 37, end='', flush=True) | 763 | print(f"\t> Download Rfam.cm.gz from Rfam..." + " " * 37, end='', flush=True) |
750 | if not path.isfile(path_to_seq_data + "Rfam.cm"): | 764 | if not path.isfile(path_to_seq_data + "Rfam.cm"): |
751 | try: | 765 | try: |
752 | - _urlcleanup() | 766 | + subprocess.run(["wget", "ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.cm.gz", "-O", path_to_seq_data + "Rfam.cm.gz"]) |
753 | - _urlretrieve(f'ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.cm.gz', path_to_seq_data + "Rfam.cm.gz") | ||
754 | print(f"\t{validsymb}", flush=True) | 767 | print(f"\t{validsymb}", flush=True) |
755 | print(f"\t\t> Uncompressing Rfam.cm...", end='', flush=True) | 768 | print(f"\t\t> Uncompressing Rfam.cm...", end='', flush=True) |
756 | subprocess.run(["gunzip", path_to_seq_data + "Rfam.cm.gz"], stdout=subprocess.DEVNULL) | 769 | subprocess.run(["gunzip", path_to_seq_data + "Rfam.cm.gz"], stdout=subprocess.DEVNULL) |
... | @@ -813,16 +826,14 @@ class Downloader: | ... | @@ -813,16 +826,14 @@ class Downloader: |
813 | if not path.isfile(path_to_seq_data + f"rfam_sequences/fasta/{rfam_acc}.fa.gz"): | 826 | if not path.isfile(path_to_seq_data + f"rfam_sequences/fasta/{rfam_acc}.fa.gz"): |
814 | for _ in range(10): # retry 100 times if it fails | 827 | for _ in range(10): # retry 100 times if it fails |
815 | try: | 828 | try: |
816 | - _urlcleanup() | 829 | + subprocess.run(["wget", f'ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/fasta_files/{rfam_acc}.fa.gz', "-O", path_to_seq_data + f"rfam_sequences/fasta/{rfam_acc}.fa.gz"], stdout=subprocess.DEVNULL) |
817 | - _urlretrieve( f'ftp://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/fasta_files/{rfam_acc}.fa.gz', | ||
818 | - path_to_seq_data + f"rfam_sequences/fasta/{rfam_acc}.fa.gz") | ||
819 | notify(f"Downloaded {rfam_acc}.fa.gz from Rfam") | 830 | notify(f"Downloaded {rfam_acc}.fa.gz from Rfam") |
820 | return # if it worked, no need to retry | 831 | return # if it worked, no need to retry |
821 | except Exception as e: | 832 | except Exception as e: |
822 | warn(f"Error downloading {rfam_acc}.fa.gz: {e}") | 833 | warn(f"Error downloading {rfam_acc}.fa.gz: {e}") |
823 | warn("retrying in 0.2s (worker " + str(os.getpid()) + f', try {_+1}/100)') | 834 | warn("retrying in 0.2s (worker " + str(os.getpid()) + f', try {_+1}/100)') |
824 | time.sleep(0.2) | 835 | time.sleep(0.2) |
825 | - warn("Tried to reach database 100 times and failed. Aborting.", error=True) | 836 | + warn("Tried to reach Rfam FTP 100 times and failed. Aborting.", error=True) |
826 | else: | 837 | else: |
827 | notify(f"Downloaded {rfam_acc}.fa.gz from Rfam", "already there") | 838 | notify(f"Downloaded {rfam_acc}.fa.gz from Rfam", "already there") |
828 | 839 | ||
... | @@ -860,14 +871,11 @@ class Downloader: | ... | @@ -860,14 +871,11 @@ class Downloader: |
860 | def download_from_SILVA(self, unit): | 871 | def download_from_SILVA(self, unit): |
861 | if not path.isfile(path_to_seq_data + f"realigned/{unit}.arb"): | 872 | if not path.isfile(path_to_seq_data + f"realigned/{unit}.arb"): |
862 | try: | 873 | try: |
863 | - _urlcleanup() | ||
864 | print(f"Downloading {unit} from SILVA...", end='', flush=True) | 874 | print(f"Downloading {unit} from SILVA...", end='', flush=True) |
865 | if unit=="LSU": | 875 | if unit=="LSU": |
866 | - _urlretrieve('http://www.arb-silva.de/fileadmin/arb_web_db/release_132/ARB_files/SILVA_132_LSURef_07_12_17_opt.arb.gz', | 876 | + subprocess.run(["wget", "http://www.arb-silva.de/fileadmin/arb_web_db/release_132/ARB_files/SILVA_132_LSURef_07_12_17_opt.arb.gz", "-O", path_to_seq_data + "realigned/LSU.arb.gz"]) |
867 | - path_to_seq_data + "realigned/LSU.arb.gz") | ||
868 | else: | 877 | else: |
869 | - _urlretrieve('http://www.arb-silva.de/fileadmin/silva_databases/release_138/ARB_files/SILVA_138_SSURef_05_01_20_opt.arb.gz', | 878 | + subprocess.run(["wget", "http://www.arb-silva.de/fileadmin/silva_databases/release_138/ARB_files/SILVA_138_SSURef_05_01_20_opt.arb.gz", "-O", path_to_seq_data + "realigned/SSU.arb.gz"]) |
870 | - path_to_seq_data + "realigned/SSU.arb.gz") | ||
871 | except: | 879 | except: |
872 | warn(f"Error downloading the {unit} database from SILVA", error=True) | 880 | warn(f"Error downloading the {unit} database from SILVA", error=True) |
873 | exit(1) | 881 | exit(1) |
... | @@ -986,7 +994,7 @@ class Pipeline: | ... | @@ -986,7 +994,7 @@ class Pipeline: |
986 | 994 | ||
987 | for opt, arg in opts: | 995 | for opt, arg in opts: |
988 | 996 | ||
989 | - if opt in ["--from-scratch", "--update-mmcifs", "--update-homologous"] and "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data]: | 997 | + if opt in ["--from-scratch", "--update-homologous"] and "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data]: |
990 | print("Please provide --3d-folder and --seq-folder first, so that we know what to delete and update.") | 998 | print("Please provide --3d-folder and --seq-folder first, so that we know what to delete and update.") |
991 | exit() | 999 | exit() |
992 | 1000 | ||
... | @@ -1083,7 +1091,7 @@ class Pipeline: | ... | @@ -1083,7 +1091,7 @@ class Pipeline: |
1083 | elif opt == "--archive": | 1091 | elif opt == "--archive": |
1084 | self.ARCHIVE = True | 1092 | self.ARCHIVE = True |
1085 | 1093 | ||
1086 | - if "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data]: | 1094 | + if self.HOMOLOGY and "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data] or path_to_3D_data == "tobedefinedbyoptions": |
1087 | print("usage: RNANet.py --3d-folder path/where/to/store/chains --seq-folder path/where/to/store/alignments") | 1095 | print("usage: RNANet.py --3d-folder path/where/to/store/chains --seq-folder path/where/to/store/alignments") |
1088 | print("See RNANet.py --help for more information.") | 1096 | print("See RNANet.py --help for more information.") |
1089 | exit(1) | 1097 | exit(1) |
... | @@ -1144,7 +1152,7 @@ class Pipeline: | ... | @@ -1144,7 +1152,7 @@ class Pipeline: |
1144 | chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}" | 1152 | chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}" |
1145 | res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc IS NULL AND issue=0""") | 1153 | res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc IS NULL AND issue=0""") |
1146 | if not len(res): # the chain is NOT yet in the database, or this is a known issue | 1154 | if not len(res): # the chain is NOT yet in the database, or this is a known issue |
1147 | - self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label)) | 1155 | + self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class)) |
1148 | conn.close() | 1156 | conn.close() |
1149 | 1157 | ||
1150 | if self.SELECT_ONLY is not None: | 1158 | if self.SELECT_ONLY is not None: |
... | @@ -1476,22 +1484,6 @@ def init_worker(tqdm_lock=None): | ... | @@ -1476,22 +1484,6 @@ def init_worker(tqdm_lock=None): |
1476 | if tqdm_lock is not None: | 1484 | if tqdm_lock is not None: |
1477 | tqdm.set_lock(tqdm_lock) | 1485 | tqdm.set_lock(tqdm_lock) |
1478 | 1486 | ||
1479 | -def trace_unhandled_exceptions(func): | ||
1480 | - @wraps(func) | ||
1481 | - def wrapped_func(*args, **kwargs): | ||
1482 | - try: | ||
1483 | - return func(*args, **kwargs) | ||
1484 | - except: | ||
1485 | - s = traceback.format_exc() | ||
1486 | - with open(runDir + "/errors.txt", "a") as f: | ||
1487 | - f.write("Exception in "+func.__name__+"\n") | ||
1488 | - f.write(s) | ||
1489 | - f.write("\n\n") | ||
1490 | - | ||
1491 | - warn('Exception in '+func.__name__, error=True) | ||
1492 | - print(s) | ||
1493 | - return wrapped_func | ||
1494 | - | ||
1495 | def warn(message, error=False): | 1487 | def warn(message, error=False): |
1496 | """Pretty-print warnings and error messages. | 1488 | """Pretty-print warnings and error messages. |
1497 | """ | 1489 | """ |
... | @@ -1894,9 +1886,9 @@ def work_infer_mappings(update_only, allmappings, codelist): | ... | @@ -1894,9 +1886,9 @@ def work_infer_mappings(update_only, allmappings, codelist): |
1894 | with sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) as conn: | 1886 | with sqlite3.connect(runDir+"/results/RNANet.db", timeout=10.0) as conn: |
1895 | res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc='{rfam}' AND issue=0""") | 1887 | res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc='{rfam}' AND issue=0""") |
1896 | if not len(res): # the chain is NOT yet in the database, or this is a known issue | 1888 | if not len(res): # the chain is NOT yet in the database, or this is a known issue |
1897 | - newchains.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, rfam=rfam, inferred=inferred, pdb_start=pdb_start, pdb_end=pdb_end)) | 1889 | + newchains.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class, rfam=rfam, inferred=inferred, pdb_start=pdb_start, pdb_end=pdb_end)) |
1898 | else: | 1890 | else: |
1899 | - newchains.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, rfam=rfam, inferred=inferred, pdb_start=pdb_start, pdb_end=pdb_end)) | 1891 | + newchains.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class, rfam=rfam, inferred=inferred, pdb_start=pdb_start, pdb_end=pdb_end)) |
1900 | 1892 | ||
1901 | return newchains | 1893 | return newchains |
1902 | 1894 | ||
... | @@ -1907,14 +1899,12 @@ def work_mmcif(pdb_id): | ... | @@ -1907,14 +1899,12 @@ def work_mmcif(pdb_id): |
1907 | SETS table structure | 1899 | SETS table structure |
1908 | """ | 1900 | """ |
1909 | 1901 | ||
1910 | - url = 'http://files.rcsb.org/download/%s.cif' % (pdb_id) | ||
1911 | final_filepath = path_to_3D_data+"RNAcifs/"+pdb_id+".cif" | 1902 | final_filepath = path_to_3D_data+"RNAcifs/"+pdb_id+".cif" |
1912 | 1903 | ||
1913 | # Attempt to download it if not present | 1904 | # Attempt to download it if not present |
1914 | try: | 1905 | try: |
1915 | if not path.isfile(final_filepath): | 1906 | if not path.isfile(final_filepath): |
1916 | - _urlcleanup() | 1907 | + subprocess.run(["wget", f'http://files.rcsb.org/download/{pdb_id}.cif', "-O", final_filepath], stdout=subprocess.DEVNULL) |
1917 | - _urlretrieve(url, final_filepath) | ||
1918 | except: | 1908 | except: |
1919 | warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True) | 1909 | warn(f"Unable to download {pdb_id}.cif. Ignoring it.", error=True) |
1920 | return | 1910 | return | ... | ... |
-
Please register or login to post a comment