Showing
2 changed files
with
11 additions
and
13 deletions
... | @@ -553,14 +553,14 @@ class Chain: | ... | @@ -553,14 +553,14 @@ class Chain: |
553 | AND rfam_acc='{self.mapping.rfam_acc}' | 553 | AND rfam_acc='{self.mapping.rfam_acc}' |
554 | AND eq_class='{self.eq_class}';""")[0][0] | 554 | AND eq_class='{self.eq_class}';""")[0][0] |
555 | else: | 555 | else: |
556 | - sql_execute(conn, """INSERT INTO chain (structure_id, chain_name, rfam_acc, eq_class, issue) VALUES (?, ?, NULL, ?, ?) | 556 | + sql_execute(conn, """INSERT INTO chain (structure_id, chain_name, rfam_acc, eq_class, issue) VALUES (?, ?, 'unmappd', ?, ?) |
557 | ON CONFLICT(structure_id, chain_name, rfam_acc) DO UPDATE SET issue=excluded.issue, eq_class=excluded.eq_class;""", | 557 | ON CONFLICT(structure_id, chain_name, rfam_acc) DO UPDATE SET issue=excluded.issue, eq_class=excluded.eq_class;""", |
558 | data=(str(self.pdb_id), str(self.pdb_chain_id), str(self.eq_class), int(self.delete_me))) | 558 | data=(str(self.pdb_id), str(self.pdb_chain_id), str(self.eq_class), int(self.delete_me))) |
559 | self.db_chain_id = sql_ask_database(conn, f"""SELECT (chain_id) FROM chain | 559 | self.db_chain_id = sql_ask_database(conn, f"""SELECT (chain_id) FROM chain |
560 | WHERE structure_id='{self.pdb_id}' | 560 | WHERE structure_id='{self.pdb_id}' |
561 | AND chain_name='{self.pdb_chain_id}' | 561 | AND chain_name='{self.pdb_chain_id}' |
562 | AND eq_class='{self.eq_class}' | 562 | AND eq_class='{self.eq_class}' |
563 | - AND rfam_acc IS NULL;""")[0][0] | 563 | + AND rfam_acc = 'unmappd';""")[0][0] |
564 | 564 | ||
565 | # Add the nucleotides if the chain is not an issue | 565 | # Add the nucleotides if the chain is not an issue |
566 | if df is not None and not self.delete_me: # double condition is theoretically redundant here, but you never know | 566 | if df is not None and not self.delete_me: # double condition is theoretically redundant here, but you never know |
... | @@ -1193,7 +1193,7 @@ class Pipeline: | ... | @@ -1193,7 +1193,7 @@ class Pipeline: |
1193 | pdb_model = int(nr[1]) | 1193 | pdb_model = int(nr[1]) |
1194 | pdb_chain_id = nr[2].upper() | 1194 | pdb_chain_id = nr[2].upper() |
1195 | chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}" | 1195 | chain_label = f"{pdb_id}_{str(pdb_model)}_{pdb_chain_id}" |
1196 | - res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc IS NULL AND issue=0""") | 1196 | + res = sql_ask_database(conn, f"""SELECT chain_id from chain WHERE structure_id='{pdb_id}' AND chain_name='{pdb_chain_id}' AND rfam_acc = 'unmappd' AND issue=0""") |
1197 | if not len(res) or self.REUSE_ALL: # the chain is NOT yet in the database, or this is a known issue | 1197 | if not len(res) or self.REUSE_ALL: # the chain is NOT yet in the database, or this is a known issue |
1198 | self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class)) | 1198 | self.update.append(Chain(pdb_id, pdb_model, pdb_chain_id, chain_label, eq_class)) |
1199 | conn.close() | 1199 | conn.close() | ... | ... |
... | @@ -610,22 +610,22 @@ def general_stats(): | ... | @@ -610,22 +610,22 @@ def general_stats(): |
610 | with sqlite3.connect("results/RNANet.db") as conn: | 610 | with sqlite3.connect("results/RNANet.db") as conn: |
611 | df_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution | 611 | df_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution |
612 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id | 612 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id |
613 | - WHERE rfam_acc IS NULL AND ISSUE=0;""", conn) | 613 | + WHERE rfam_acc = 'unmappd' AND ISSUE=0;""", conn) |
614 | df_mapped_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution | 614 | df_mapped_unique = pd.read_sql(f"""SELECT distinct pdb_id, chain_name, exp_method, resolution |
615 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id | 615 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id |
616 | - WHERE rfam_acc IS NOT NULL AND ISSUE=0;""", conn) | 616 | + WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn) |
617 | df_mapped_copies = pd.read_sql(f"""SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution | 617 | df_mapped_copies = pd.read_sql(f"""SELECT pdb_id, chain_name, inferred, rfam_acc, pdb_start, pdb_end, exp_method, resolution |
618 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id | 618 | FROM chain JOIN structure ON chain.structure_id = structure.pdb_id |
619 | - WHERE rfam_acc IS NOT NULL AND ISSUE=0;""", conn) | 619 | + WHERE rfam_acc != 'unmappd' AND ISSUE=0;""", conn) |
620 | df_inferred_only_unique = pd.read_sql(f"""SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution | 620 | df_inferred_only_unique = pd.read_sql(f"""SELECT DISTINCT pdb_id, c.chain_name, exp_method, resolution |
621 | FROM (SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy | 621 | FROM (SELECT inferred, rfam_acc, pdb_start, pdb_end, chain.structure_id, chain.chain_name, r.redundancy, r.inf_redundancy |
622 | FROM chain | 622 | FROM chain |
623 | JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy | 623 | JOIN (SELECT structure_id, chain_name, COUNT(distinct rfam_acc) AS redundancy, SUM(inferred) AS inf_redundancy |
624 | FROM chain | 624 | FROM chain |
625 | - WHERE rfam_acc IS NOT NULL AND issue=0 | 625 | + WHERE rfam_acc != 'unmappd' AND issue=0 |
626 | GROUP BY structure_id, chain_name | 626 | GROUP BY structure_id, chain_name |
627 | ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name | 627 | ) AS r ON chain.structure_id=r.structure_id AND chain.chain_name = r.chain_name |
628 | - WHERE r.redundancy=r.inf_redundancy AND rfam_acc IS NOT NULL and issue=0 | 628 | + WHERE r.redundancy=r.inf_redundancy AND rfam_acc != 'unmappd' and issue=0 |
629 | ) AS c | 629 | ) AS c |
630 | JOIN structure ON c.structure_id=structure.pdb_id;""", conn) | 630 | JOIN structure ON c.structure_id=structure.pdb_id;""", conn) |
631 | print("> found", len(df_inferred_only_unique.index), "chains which are mapped only by inference using BGSU NR Lists.") | 631 | print("> found", len(df_inferred_only_unique.index), "chains which are mapped only by inference using BGSU NR Lists.") |
... | @@ -775,9 +775,6 @@ def log_to_pbar(pbar): | ... | @@ -775,9 +775,6 @@ def log_to_pbar(pbar): |
775 | 775 | ||
776 | if __name__ == "__main__": | 776 | if __name__ == "__main__": |
777 | 777 | ||
778 | - general_stats() | ||
779 | - exit() | ||
780 | - | ||
781 | # parse options | 778 | # parse options |
782 | try: | 779 | try: |
783 | opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "resolution=", "3d-folder=", "seq-folder=" ]) | 780 | opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "resolution=", "3d-folder=", "seq-folder=" ]) |
... | @@ -839,8 +836,8 @@ if __name__ == "__main__": | ... | @@ -839,8 +836,8 @@ if __name__ == "__main__": |
839 | 836 | ||
840 | # Define the tasks | 837 | # Define the tasks |
841 | joblist = [] | 838 | joblist = [] |
842 | - joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 4.0))) # res threshold is 4.0 Angstroms by default | 839 | + # joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), 4.0))) # res threshold is 4.0 Angstroms by default |
843 | - joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 4.0))) # | 840 | + # joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), 4.0))) # |
844 | joblist.append(Job(function=stats_len)) # Computes figures | 841 | joblist.append(Job(function=stats_len)) # Computes figures |
845 | # joblist.append(Job(function=stats_freq)) # updates the database | 842 | # joblist.append(Job(function=stats_freq)) # updates the database |
846 | # for f in famlist: | 843 | # for f in famlist: |
... | @@ -873,3 +870,4 @@ if __name__ == "__main__": | ... | @@ -873,3 +870,4 @@ if __name__ == "__main__": |
873 | # per_chain_stats() | 870 | # per_chain_stats() |
874 | # seq_idty() | 871 | # seq_idty() |
875 | # stats_pairs() | 872 | # stats_pairs() |
873 | + general_stats() | ... | ... |
-
Please register or login to post a comment