Aglaé TABOT

reorganization and addition of options

...@@ -2978,8 +2978,9 @@ if __name__ == "__main__": ...@@ -2978,8 +2978,9 @@ if __name__ == "__main__":
2978 DELETE_OLD_DATA = False 2978 DELETE_OLD_DATA = False
2979 DO_WADLEY_ANALYSIS = False 2979 DO_WADLEY_ANALYSIS = False
2980 DO_AVG_DISTANCE_MATRIX = False 2980 DO_AVG_DISTANCE_MATRIX = False
2981 + DO_HIRE_RNA_MEASURES = False
2981 try: 2982 try:
2982 - opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=" ]) 2983 + opts, _ = getopt.getopt( sys.argv[1:], "r:h", [ "help", "from-scratch", "wadley", "distance-matrices", "resolution=", "3d-folder=", "seq-folder=", "hire-rna=" ])
2983 except getopt.GetoptError as err: 2984 except getopt.GetoptError as err:
2984 print(err) 2985 print(err)
2985 sys.exit(2) 2986 sys.exit(2)
...@@ -3000,6 +3001,7 @@ if __name__ == "__main__": ...@@ -3000,6 +3001,7 @@ if __name__ == "__main__":
3000 print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything") 3001 print("--from-scratch\t\t\tDo not use precomputed results from past runs, recompute everything")
3001 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.") 3002 print("--distance-matrices\t\tCompute average distance between nucleotide pairs for each family.")
3002 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.") 3003 print("--wadley\t\t\tReproduce Wadley & al 2007 clustering of pseudotorsions.")
3004 + print("--hire-rna\t\t\tCompute distances between atoms and torsion angles for HiRE-RNA model, and plot GMMs on the data.")
3003 3005
3004 sys.exit() 3006 sys.exit()
3005 elif opt == '--version': 3007 elif opt == '--version':
...@@ -3023,10 +3025,12 @@ if __name__ == "__main__": ...@@ -3023,10 +3025,12 @@ if __name__ == "__main__":
3023 DO_AVG_DISTANCE_MATRIX = True 3025 DO_AVG_DISTANCE_MATRIX = True
3024 elif opt=='--wadley': 3026 elif opt=='--wadley':
3025 DO_WADLEY_ANALYSIS = True 3027 DO_WADLEY_ANALYSIS = True
3028 + elif opt=='--hire-rna':
3029 + DO_HIRE_RNA_MEASURES = True
3026 3030
3027 3031
3028 # Load mappings. famlist will contain only families with structures at this resolution threshold. 3032 # Load mappings. famlist will contain only families with structures at this resolution threshold.
3029 - ''' 3033 +
3030 print("Loading mappings list...") 3034 print("Loading mappings list...")
3031 with sqlite3.connect(runDir + "/results/RNANet.db") as conn: 3035 with sqlite3.connect(runDir + "/results/RNANet.db") as conn:
3032 conn.execute('pragma journal_mode=wal') 3036 conn.execute('pragma journal_mode=wal')
...@@ -3050,7 +3054,7 @@ if __name__ == "__main__": ...@@ -3050,7 +3054,7 @@ if __name__ == "__main__":
3050 print(f"Found {len(famlist)} families with chains of resolution {res_thr}A or better.") 3054 print(f"Found {len(famlist)} families with chains of resolution {res_thr}A or better.")
3051 if len(ignored): 3055 if len(ignored):
3052 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n') 3056 print(f"Idty matrices: Ignoring {len(ignored)} families with only one chain:", " ".join(ignored)+'\n')
3053 - ''' 3057 +
3054 if DELETE_OLD_DATA: 3058 if DELETE_OLD_DATA:
3055 for f in famlist: 3059 for f in famlist:
3056 subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"]) 3060 subprocess.run(["rm","-f", runDir + f"/data/{f}.npy", runDir + f"/data/{f}_pairs.csv", runDir + f"/data/{f}_counts.csv"])
...@@ -3068,7 +3072,7 @@ if __name__ == "__main__": ...@@ -3068,7 +3072,7 @@ if __name__ == "__main__":
3068 3072
3069 # Define the tasks 3073 # Define the tasks
3070 joblist = [] 3074 joblist = []
3071 - ''' 3075 +
3072 if n_unmapped_chains and DO_WADLEY_ANALYSIS: 3076 if n_unmapped_chains and DO_WADLEY_ANALYSIS:
3073 joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr))) 3077 joblist.append(Job(function=reproduce_wadley_results, args=(1, False, (1,4), res_thr)))
3074 joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr))) 3078 joblist.append(Job(function=reproduce_wadley_results, args=(4, False, (1,4), res_thr)))
...@@ -3090,15 +3094,20 @@ if __name__ == "__main__": ...@@ -3090,15 +3094,20 @@ if __name__ == "__main__":
3090 joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database 3094 joblist.append(Job(function=parallel_stats_pairs, args=(f,))) # updates the database
3091 if f not in ignored: 3095 if f not in ignored:
3092 joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database 3096 joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database
3093 - '''
3094 -
3095 3097
3096 - '''
3097 f_prec=os.listdir(path_to_3D_data + "rna_only")[0] 3098 f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
3098 - #exit() 3099 + for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
3099 - for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
3100 joblist.append(Job(function=dist_atoms, args=(f,))) 3100 joblist.append(Job(function=dist_atoms, args=(f,)))
3101 +
3102 + if DO_HIRE_RNA_MEASURES:
3103 + f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
3104 + for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
3105 + joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
3106 + joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
3107 + joblist.append(Job(function=angles_plans_hire_RNA, args=(f,)))
3108 +
3101 ''' 3109 '''
3110 + Basepairs
3102 ''' 3111 '''
3103 ld=os.listdir(path_to_3D_data +'datapoints')[:1000] 3112 ld=os.listdir(path_to_3D_data +'datapoints')[:1000]
3104 if '4zdo_1_E' in ld : 3113 if '4zdo_1_E' in ld :
...@@ -3109,19 +3118,9 @@ if __name__ == "__main__": ...@@ -3109,19 +3118,9 @@ if __name__ == "__main__":
3109 3118
3110 for cle in dictionnaire.keys(): 3119 for cle in dictionnaire.keys():
3111 joblist.append(Job(function=parse, args=(cle,))) 3120 joblist.append(Job(function=parse, args=(cle,)))
3112 - '''
3113 3121
3114 - #exit()
3115 - '''
3116 - f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
3117 -
3118 - for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
3119 - joblist.append(Job(function=dist_atoms, args=(f,)))
3120 - joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
3121 - joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
3122 - joblist.append(Job(function=angles_plans_hire_RNA, args=(f,)))
3123 3122
3124 - ''' 3123 + #exit()
3125 3124
3126 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) 3125 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
3127 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) 3126 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True)
...@@ -3153,8 +3152,8 @@ if __name__ == "__main__": ...@@ -3153,8 +3152,8 @@ if __name__ == "__main__":
3153 print() 3152 print()
3154 print() 3153 print()
3155 3154
3156 -
3157 # finish the work after the parallel portions 3155 # finish the work after the parallel portions
3156 +
3158 ''' 3157 '''
3159 per_chain_stats() 3158 per_chain_stats()
3160 seq_idty() 3159 seq_idty()
...@@ -3162,6 +3161,10 @@ if __name__ == "__main__": ...@@ -3162,6 +3161,10 @@ if __name__ == "__main__":
3162 if n_unmapped_chains: 3161 if n_unmapped_chains:
3163 general_stats() 3162 general_stats()
3164 ''' 3163 '''
3164 +
3165 + if DO_WADLEY_ANALYSIS:
3166 + graph_eta_theta()
3167 +
3165 concatenate('/results/all-atoms/distances/', 'dist_atoms.csv') 3168 concatenate('/results/all-atoms/distances/', 'dist_atoms.csv')
3166 graph_dist_atoms() 3169 graph_dist_atoms()
3167 concatenate('/results/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv') 3170 concatenate('/results/HiRE-RNA/distances/', 'dist_atoms_hire_RNA.csv')
...@@ -3173,7 +3176,7 @@ if __name__ == "__main__": ...@@ -3173,7 +3176,7 @@ if __name__ == "__main__":
3173 graph_plans_h_RNA() 3176 graph_plans_h_RNA()
3174 3177
3175 graph_angles_torsion() 3178 graph_angles_torsion()
3176 - graph_eta_theta() 3179 +
3177 3180
3178 concatenate('/results/basepairs/', 'basepairs.csv') 3181 concatenate('/results/basepairs/', 'basepairs.csv')
3179 graph_basepairs() 3182 graph_basepairs()
......