Louis BECQUEY

using sqlite instead of text files

This diff is collapsed. Click to expand it.
...@@ -17,7 +17,7 @@ from functools import partial ...@@ -17,7 +17,7 @@ from functools import partial
17 from multiprocessing import Pool 17 from multiprocessing import Pool
18 from os import path 18 from os import path
19 from collections import Counter 19 from collections import Counter
20 -from RNAnet import read_cpu_number 20 +from RNAnet import read_cpu_number, sql_ask_database
21 21
22 22
23 path_to_3D_data = "/nhome/siniac/lbecquey/Data/RNA/3D/" 23 path_to_3D_data = "/nhome/siniac/lbecquey/Data/RNA/3D/"
...@@ -182,17 +182,22 @@ def stats_len(mappings_list, points): ...@@ -182,17 +182,22 @@ def stats_len(mappings_list, points):
182 fig = plt.figure(figsize=(10,3)) 182 fig = plt.figure(figsize=(10,3))
183 ax = fig.gca() 183 ax = fig.gca()
184 ax.hist(lengths, bins=100, stacked=True, log=True, color=cols, label=sorted(mappings_list.keys())) 184 ax.hist(lengths, bins=100, stacked=True, log=True, color=cols, label=sorted(mappings_list.keys()))
185 - ax.set_xlabel("Sequence length (nucleotides)") 185 + ax.set_xlabel("Sequence length (nucleotides)", fontsize=8)
186 - ax.set_ylabel("Number of 3D chains") 186 + ax.set_ylabel("Number of 3D chains", fontsize=8)
187 + ax.set_xlim(left=-150)
188 + ax.tick_params(axis='both', which='both', labelsize=8)
187 fig.tight_layout() 189 fig.tight_layout()
188 - filtered_handles = [mpatches.Patch(color='red'), mpatches.Patch(color='white'), 190 + fig.subplots_adjust(right=0.78)
189 - mpatches.Patch(color='blue'), mpatches.Patch(color='white'), 191 + filtered_handles = [mpatches.Patch(color='red'), mpatches.Patch(color='white'), mpatches.Patch(color='white'), mpatches.Patch(color='white'),
192 + mpatches.Patch(color='blue'), mpatches.Patch(color='white'), mpatches.Patch(color='white'),
190 mpatches.Patch(color='green'), mpatches.Patch(color='purple'), 193 mpatches.Patch(color='green'), mpatches.Patch(color='purple'),
191 mpatches.Patch(color='orange'), mpatches.Patch(color='grey')] 194 mpatches.Patch(color='orange'), mpatches.Patch(color='grey')]
192 - filtered_labels = ['Large Ribosomal Subunits', '(RF02540, RF02541, RF02543)','Small Ribosomal Subunits','(RF01960, RF00177)', 195 + filtered_labels = ['Large Ribosomal Subunits', '(RF02540,', 'RF02541', 'RF02543)',
196 + 'Small Ribosomal Subunits','(RF01960,', 'RF00177)',
193 '5S rRNA (RF00001)', '5.8S rRNA (RF00002)', 'tRNA (RF00005)', 'Other'] 197 '5S rRNA (RF00001)', '5.8S rRNA (RF00002)', 'tRNA (RF00005)', 'Other']
194 - ax.legend(filtered_handles, filtered_labels, loc='best', ncol=2) 198 + ax.legend(filtered_handles, filtered_labels, loc='right',
195 - fig.savefig("results/lengths.png") 199 + ncol=1, fontsize='small', bbox_to_anchor=(1.3, 0.55))
200 + fig.savefig("results/figures/lengths.png")
196 # print("[3]\tComputed sequence length statistics and saved the figure.") 201 # print("[3]\tComputed sequence length statistics and saved the figure.")
197 202
198 def format_percentage(tot, x): 203 def format_percentage(tot, x):
...@@ -284,7 +289,7 @@ def stats_pairs(mappings_list, points): ...@@ -284,7 +289,7 @@ def stats_pairs(mappings_list, points):
284 ax = total_series.plot(figsize=(5,3), kind='bar', log=True, ylim=(1e4,5000000) ) 289 ax = total_series.plot(figsize=(5,3), kind='bar', log=True, ylim=(1e4,5000000) )
285 ax.set_ylabel("Number of observations") 290 ax.set_ylabel("Number of observations")
286 plt.subplots_adjust(bottom=0.2, right=0.99) 291 plt.subplots_adjust(bottom=0.2, right=0.99)
287 - plt.savefig("results/pairings.png") 292 + plt.savefig("results/figures/pairings.png")
288 293
289 # print("[5]\tComputed nucleotide statistics and saved CSV and PNG file.") 294 # print("[5]\tComputed nucleotide statistics and saved CSV and PNG file.")
290 295
...@@ -350,7 +355,7 @@ def seq_idty(mappings_list): ...@@ -350,7 +355,7 @@ def seq_idty(mappings_list):
350 fig.tight_layout() 355 fig.tight_layout()
351 fig.subplots_adjust(wspace=0.1, hspace=0.3) 356 fig.subplots_adjust(wspace=0.1, hspace=0.3)
352 fig.colorbar(im, ax=axs[-1], shrink=0.8) 357 fig.colorbar(im, ax=axs[-1], shrink=0.8)
353 - fig.savefig(f"results/distances.png") 358 + fig.savefig(f"results/figures/distances.png")
354 # print("[6]\tComputed identity matrices and saved the figure.") 359 # print("[6]\tComputed identity matrices and saved the figure.")
355 360
356 if __name__ == "__main__": 361 if __name__ == "__main__":
...@@ -389,12 +394,12 @@ if __name__ == "__main__": ...@@ -389,12 +394,12 @@ if __name__ == "__main__":
389 # Define threads for the tasks 394 # Define threads for the tasks
390 ################################################################# 395 #################################################################
391 threads = [ 396 threads = [
392 - th.Thread(target=reproduce_wadley_results, args=[rna_points], kwargs={'carbon': 1}), 397 + # th.Thread(target=reproduce_wadley_results, args=[rna_points], kwargs={'carbon': 1}),
393 - th.Thread(target=reproduce_wadley_results, args=[rna_points], kwargs={'carbon': 4}), 398 + # th.Thread(target=reproduce_wadley_results, args=[rna_points], kwargs={'carbon': 4}),
394 th.Thread(target=partial(stats_len, mappings_list), args=[rna_points]), 399 th.Thread(target=partial(stats_len, mappings_list), args=[rna_points]),
395 - th.Thread(target=partial(stats_freq, mappings_list), args=[rna_points]), 400 + # th.Thread(target=partial(stats_freq, mappings_list), args=[rna_points]),
396 - th.Thread(target=partial(stats_pairs, mappings_list), args=[rna_points]), 401 + # th.Thread(target=partial(stats_pairs, mappings_list), args=[rna_points]),
397 - th.Thread(target=seq_idty, args=[mappings_list]) 402 + # th.Thread(target=seq_idty, args=[mappings_list])
398 ] 403 ]
399 404
400 for t in threads: 405 for t in threads:
......