Showing
1 changed file
with
25 additions
and
22 deletions
... | @@ -2978,8 +2978,9 @@ if __name__ == "__main__": | ... | @@ -2978,8 +2978,9 @@ if __name__ == "__main__": |
2978 | DELETE_OLD_DATA = False | 2978 | DELETE_OLD_DATA = False |
2979 | DO_WADLEY_ANALYSIS = False | 2979 | DO_WADLEY_ANALYSIS = False |
2980 | DO_AVG_DISTANCE_MATRIX = False | 2980 | DO_AVG_DISTANCE_MATRIX = False |
2981 | + DO_HIRE_RNA_MEASURES = False | ||
2981 | try: | 2982 | try: |
2982 | - opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=" ]) | 2983 | + opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=", "hire-rna=" ]) |
2983 | except getopt.GetoptError as err: | 2984 | except getopt.GetoptError as err: |
2984 | print(err) | 2985 | print(err) |
2985 | sys.exit(2) | 2986 | sys.exit(2) |
... | @@ -3000,6 +3001,7 @@ if __name__ == "__main__": | ... | @@ -3000,6 +3001,7 @@ if __name__ == "__main__": |
3000 | print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything") | 3001 | print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything") |
3001 | print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") | 3002 | print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") |
3002 | print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") | 3003 | print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") |
3004 | + print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.") | ||
3003 | 3005 | ||
3004 | sys.exit() | 3006 | sys.exit() |
3005 | elif opt == '--version': | 3007 | elif opt == '--version': |
... | @@ -3023,10 +3025,12 @@ if __name__ == "__main__": | ... | @@ -3023,10 +3025,12 @@ if __name__ == "__main__": |
3023 | DO_AVG_DISTANCE_MATRIX = True | 3025 | DO_AVG_DISTANCE_MATRIX = True |
3024 | elif opt=='--wadley': | 3026 | elif opt=='--wadley': |
3025 | DO_WADLEY_ANALYSIS = True | 3027 | DO_WADLEY_ANALYSIS = True |
3028 | + elif opt=='--hire-rna': | ||
3029 | + DO_HIRE_RNA_MEASURES = True | ||
3026 | 3030 | ||
3027 | 3031 | ||
3028 | # Load mappings. famlist will contain only families with structures at this resolution threshold. | 3032 | # Load mappings. famlist will contain only families with structures at this resolution threshold. |
3029 | - ''' | 3033 | + |
3030 | print("Loading mappings list...") | 3034 | print("Loading mappings list...") |
3031 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: | 3035 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: |
3032 | conn.execute('pragma journal_mode=wal') | 3036 | conn.execute('pragma journal_mode=wal') |
... | @@ -3050,7 +3054,7 @@ if __name__ == "__main__": | ... | @@ -3050,7 +3054,7 @@ if __name__ == "__main__": |
3050 | print(f"Found {len(famlist)} families with chains of resolution {res_thr}A or better.") | 3054 | print(f"Found {len(famlist)} families with chains of resolution {res_thr}A or better.") |
3051 | if len(ignored): | 3055 | if len(ignored): |
3052 | print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n') | 3056 | print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n') |
3053 | - ''' | 3057 | + |
3054 | if DELETE_OLD_DATA: | 3058 | if DELETE_OLD_DATA: |
3055 | for f in famlist: | 3059 | for f in famlist: |
3056 | subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"]) | 3060 | subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"]) |
... | @@ -3068,7 +3072,7 @@ if __name__ == "__main__": | ... | @@ -3068,7 +3072,7 @@ if __name__ == "__main__": |
3068 | 3072 | ||
3069 | # Define the tasks | 3073 | # Define the tasks |
3070 | joblist = [] | 3074 | joblist = [] |
3071 | - ''' | 3075 | + |
3072 | if n_unmapped_chains and DO_WADLEY_ANALYSIS: | 3076 | if n_unmapped_chains and DO_WADLEY_ANALYSIS: |
3073 | joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) | 3077 | joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) |
3074 | joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) | 3078 | joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) |
... | @@ -3090,15 +3094,20 @@ if __name__ == "__main__": | ... | @@ -3090,15 +3094,20 @@ if __name__ == "__main__": |
3090 | joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database | 3094 | joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database |
3091 | if f not in ignored: | 3095 | if f not in ignored: |
3092 | joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database | 3096 | joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database |
3093 | - ''' | ||
3094 | - | ||
3095 | 3097 | ||
3096 | - ''' | ||
3097 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | 3098 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] |
3098 | - #exit() | 3099 | + for f in os.listdir(path_to_3D_data + "rna_only")[:100]: |
3099 | - for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | ||
3100 | joblist.append(Job(function=dist_atoms, args=(f,))) | 3100 | joblist.append(Job(function=dist_atoms, args=(f,))) |
3101 | + | ||
3102 | + if DO_HIRE_RNA_MEASURES: | ||
3103 | + f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | ||
3104 | + for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | ||
3105 | + joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) | ||
3106 | + joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) | ||
3107 | + joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) | ||
3108 | + | ||
3101 | ''' | 3109 | ''' |
3110 | + Basepairs | ||
3102 | ''' | 3111 | ''' |
3103 | ld=os.listdir(path_to_3D_data +'datapoints')[:1000] | 3112 | ld=os.listdir(path_to_3D_data +'datapoints')[:1000] |
3104 | if '4zdo_1_E' in ld : | 3113 | if '4zdo_1_E' in ld : |
... | @@ -3109,19 +3118,9 @@ if __name__ == "__main__": | ... | @@ -3109,19 +3118,9 @@ if __name__ == "__main__": |
3109 | 3118 | ||
3110 | for cle in dictionnaire.keys(): | 3119 | for cle in dictionnaire.keys(): |
3111 | joblist.append(Job(function=parse, args=(cle,))) | 3120 | joblist.append(Job(function=parse, args=(cle,))) |
3112 | - ''' | ||
3113 | 3121 | ||
3114 | - #exit() | ||
3115 | - ''' | ||
3116 | - f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | ||
3117 | - | ||
3118 | - for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | ||
3119 | - joblist.append(Job(function=dist_atoms, args=(f,))) | ||
3120 | - joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) | ||
3121 | - joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) | ||
3122 | - joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) | ||
3123 | 3122 | ||
3124 | - ''' | 3123 | + #exit() |
3125 | 3124 | ||
3126 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 3125 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) |
3127 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) | 3126 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) |
... | @@ -3153,8 +3152,8 @@ if __name__ == "__main__": | ... | @@ -3153,8 +3152,8 @@ if __name__ == "__main__": |
3153 | print() | 3152 | print() |
3154 | print() | 3153 | print() |
3155 | 3154 | ||
3156 | - | ||
3157 | # finish the work after the parallel portions | 3155 | # finish the work after the parallel portions |
3156 | + | ||
3158 | ''' | 3157 | ''' |
3159 | per_chain_stats() | 3158 | per_chain_stats() |
3160 | seq_idty() | 3159 | seq_idty() |
... | @@ -3162,6 +3161,10 @@ if __name__ == "__main__": | ... | @@ -3162,6 +3161,10 @@ if __name__ == "__main__": |
3162 | if n_unmapped_chains: | 3161 | if n_unmapped_chains: |
3163 | general_stats() | 3162 | general_stats() |
3164 | ''' | 3163 | ''' |
3164 | + | ||
3165 | + if DO_WADLEY_ANALYSIS: | ||
3166 | + graph_eta_theta() | ||
3167 | + | ||
3165 | concatenate('/results/all-atoms/distances/', 'dist_atoms.csv') | 3168 | concatenate('/results/all-atoms/distances/', 'dist_atoms.csv') |
3166 | graph_dist_atoms() | 3169 | graph_dist_atoms() |
3167 | concatenate('/results/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv') | 3170 | concatenate('/results/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv') |
... | @@ -3173,7 +3176,7 @@ if __name__ == "__main__": | ... | @@ -3173,7 +3176,7 @@ if __name__ == "__main__": |
3173 | graph_plans_h_RNA() | 3176 | graph_plans_h_RNA() |
3174 | 3177 | ||
3175 | graph_angles_torsion() | 3178 | graph_angles_torsion() |
3176 | - graph_eta_theta() | 3179 | + |
3177 | 3180 | ||
3178 | concatenate('/results/basepairs/', 'basepairs.csv') | 3181 | concatenate('/results/basepairs/', 'basepairs.csv') |
3179 | graph_basepairs() | 3182 | graph_basepairs() | ... | ... |
-
Please register or login to post a comment