Showing
1 changed file
with
19 additions
and
7 deletions
... | @@ -845,14 +845,14 @@ class Downloader: | ... | @@ -845,14 +845,14 @@ class Downloader: |
845 | if os.path.isfile(path_to_3D_data + f"latest_nr_list_{nr_code}A.csv"): | 845 | if os.path.isfile(path_to_3D_data + f"latest_nr_list_{nr_code}A.csv"): |
846 | print("\t> Use of the previous version.\t", end="", flush=True) | 846 | print("\t> Use of the previous version.\t", end="", flush=True) |
847 | else: | 847 | else: |
848 | - return pd.DataFrame([], columns=["class", "class_members"]) | 848 | + return pd.DataFrame([], columns=["class","representative","class_members"]) |
849 | 849 | ||
850 | nrlist = pd.read_csv(path_to_3D_data + f"latest_nr_list_{nr_code}A.csv") | 850 | nrlist = pd.read_csv(path_to_3D_data + f"latest_nr_list_{nr_code}A.csv") |
851 | - full_structures_list = [ tuple(i[1]) for i in nrlist[['class', 'class_members']].iterrows() ] | 851 | + full_structures_list = [ tuple(i[1]) for i in nrlist[["class","representative","class_members"]].iterrows() ] |
852 | print(f"\t{validsymb}", flush=True) | 852 | print(f"\t{validsymb}", flush=True) |
853 | 853 | ||
854 | # The beginning of an adventure. | 854 | # The beginning of an adventure. |
855 | - return full_structures_list # list of ( str (class), str (class_members) ) | 855 | + return full_structures_list # list of ( str (class), str(representative),str (class_members) ) |
856 | 856 | ||
857 | def download_from_SILVA(self, unit): | 857 | def download_from_SILVA(self, unit): |
858 | 858 | ||
... | @@ -966,6 +966,7 @@ class Pipeline: | ... | @@ -966,6 +966,7 @@ class Pipeline: |
966 | self.RUN_STATS = False | 966 | self.RUN_STATS = False |
967 | self.EXTRACT_CHAINS = False | 967 | self.EXTRACT_CHAINS = False |
968 | self.REUSE_ALL = False | 968 | self.REUSE_ALL = False |
969 | + self.REDUNDANT = False | ||
969 | self.SELECT_ONLY = None | 970 | self.SELECT_ONLY = None |
970 | self.ARCHIVE = False | 971 | self.ARCHIVE = False |
971 | self.SAVELOGS = True | 972 | self.SAVELOGS = True |
... | @@ -982,7 +983,7 @@ class Pipeline: | ... | @@ -982,7 +983,7 @@ class Pipeline: |
982 | 983 | ||
983 | try: | 984 | try: |
984 | opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", "only=", "maxcores=", | 985 | opts, _ = getopt.getopt(sys.argv[1:], "r:fhs", ["help", "resolution=", "3d-folder=", "seq-folder=", "keep-hetatm=", "only=", "maxcores=", |
985 | - "from-scratch", "full-inference", "no-homology", "ignore-issues", "extract", | 986 | + "from-scratch", "full-inference", "no-homology","redundant", "ignore-issues", "extract", |
986 | "all", "no-logs", "archive", "update-homologous", "version"]) | 987 | "all", "no-logs", "archive", "update-homologous", "version"]) |
987 | except getopt.GetoptError as err: | 988 | except getopt.GetoptError as err: |
988 | print(err) | 989 | print(err) |
... | @@ -1006,6 +1007,7 @@ class Pipeline: | ... | @@ -1006,6 +1007,7 @@ class Pipeline: |
1006 | print("--------------------------------------------------------------------------------------------------------------") | 1007 | print("--------------------------------------------------------------------------------------------------------------") |
1007 | print("-f [ --full-inference ]\t\tInfer new mappings even if Rfam already provides some. Yields more copies of" | 1008 | print("-f [ --full-inference ]\t\tInfer new mappings even if Rfam already provides some. Yields more copies of" |
1008 | "\n\t\t\t\t chains mapped to different families.") | 1009 | "\n\t\t\t\t chains mapped to different families.") |
1010 | + print("--redundant\t\t\t\tStore the class members in the database thoughts to be redundant for predictions.") | ||
1009 | print("-s\t\t\t\tRun statistics computations after completion") | 1011 | print("-s\t\t\t\tRun statistics computations after completion") |
1010 | print("--extract\t\t\tExtract the portions of 3D RNA chains to individual mmCIF files.") | 1012 | print("--extract\t\t\tExtract the portions of 3D RNA chains to individual mmCIF files.") |
1011 | print("--keep-hetatm=False\t\t(True | False) Keep ions, waters and ligands in produced mmCIF files. " | 1013 | print("--keep-hetatm=False\t\t(True | False) Keep ions, waters and ligands in produced mmCIF files. " |
... | @@ -1103,6 +1105,8 @@ class Pipeline: | ... | @@ -1103,6 +1105,8 @@ class Pipeline: |
1103 | ncores = min(ncores, int(arg)) | 1105 | ncores = min(ncores, int(arg)) |
1104 | elif opt == "-f" or opt == "--full-inference": | 1106 | elif opt == "-f" or opt == "--full-inference": |
1105 | self.FULLINFERENCE = True | 1107 | self.FULLINFERENCE = True |
1108 | + elif opt=="--redundant": | ||
1109 | + self.REDUNDANT=True | ||
1106 | 1110 | ||
1107 | if self.HOMOLOGY and "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data] or path_to_3D_data == "tobedefinedbyoptions": | 1111 | if self.HOMOLOGY and "tobedefinedbyoptions" in [path_to_3D_data, path_to_seq_data] or path_to_3D_data == "tobedefinedbyoptions": |
1108 | print("usage: RNANet.py --3d-folder path/where/to/store/chains --seq-folder path/where/to/store/alignments") | 1112 | print("usage: RNANet.py --3d-folder path/where/to/store/chains --seq-folder path/where/to/store/alignments") |
... | @@ -1151,7 +1155,8 @@ class Pipeline: | ... | @@ -1151,7 +1155,8 @@ class Pipeline: |
1151 | work_infer_mappings, | 1155 | work_infer_mappings, |
1152 | not self.REUSE_ALL, | 1156 | not self.REUSE_ALL, |
1153 | allmappings, | 1157 | allmappings, |
1154 | - self.FULLINFERENCE | 1158 | + self.FULLINFERENCE, |
1159 | + self.REDUNDANT | ||
1155 | ), | 1160 | ), |
1156 | full_structures_list, | 1161 | full_structures_list, |
1157 | chunksize=1)): | 1162 | chunksize=1)): |
... | @@ -1905,7 +1910,7 @@ def execute_joblist(fulljoblist): | ... | @@ -1905,7 +1910,7 @@ def execute_joblist(fulljoblist): |
1905 | return results | 1910 | return results |
1906 | 1911 | ||
1907 | @trace_unhandled_exceptions | 1912 | @trace_unhandled_exceptions |
1908 | -def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> list: | 1913 | +def work_infer_mappings(update_only, allmappings, fullinference,redundant, codelist) -> list: |
1909 | """Given a list of PDB chains corresponding to an equivalence class from BGSU's NR list, | 1914 | """Given a list of PDB chains corresponding to an equivalence class from BGSU's NR list, |
1910 | build a list of Chain() objects mapped to Rfam families, by expanding available mappings | 1915 | build a list of Chain() objects mapped to Rfam families, by expanding available mappings |
1911 | of any element of the list to all the list elements. | 1916 | of any element of the list to all the list elements. |
... | @@ -1919,7 +1924,7 @@ def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> li | ... | @@ -1919,7 +1924,7 @@ def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> li |
1919 | # Split the comma-separated list of chain codes into chain codes: | 1924 | # Split the comma-separated list of chain codes into chain codes: |
1920 | eq_class = codelist[0] | 1925 | eq_class = codelist[0] |
1921 | codes = codelist[1].replace('+', ',').split(',') | 1926 | codes = codelist[1].replace('+', ',').split(',') |
1922 | - | 1927 | + representative=codelist[1].replace('+', ',').split(',')[0] |
1923 | # Search for mappings that apply to an element of this PDB chains list: | 1928 | # Search for mappings that apply to an element of this PDB chains list: |
1924 | for c in codes: | 1929 | for c in codes: |
1925 | # search for Rfam mappings with this chain c: | 1930 | # search for Rfam mappings with this chain c: |
... | @@ -2008,6 +2013,13 @@ def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> li | ... | @@ -2008,6 +2013,13 @@ def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> li |
2008 | 2013 | ||
2009 | # Now build Chain() objects for the mapped chains | 2014 | # Now build Chain() objects for the mapped chains |
2010 | for c in codes: | 2015 | for c in codes: |
2016 | + | ||
2017 | + if not redundant and c!=representative: | ||
2018 | + ''' | ||
2019 | + by default save only the representative member | ||
2020 | + if redundant is passed then save all the chains of the class members | ||
2021 | + ''' | ||
2022 | + continue | ||
2011 | nr = c.split('|') | 2023 | nr = c.split('|') |
2012 | pdb_id = nr[0].lower() | 2024 | pdb_id = nr[0].lower() |
2013 | pdb_model = int(nr[1]) | 2025 | pdb_model = int(nr[1]) | ... | ... |
-
Please register or login to post a comment