Showing
1 changed file
with
13 additions
and
5 deletions
... | @@ -3,10 +3,8 @@ import os | ... | @@ -3,10 +3,8 @@ import os |
3 | import numpy as np | 3 | import numpy as np |
4 | import pandas as pd | 4 | import pandas as pd |
5 | import threading as th | 5 | import threading as th |
6 | -import seaborn as sb | ||
7 | import scipy.stats as st | 6 | import scipy.stats as st |
8 | import matplotlib.pyplot as plt | 7 | import matplotlib.pyplot as plt |
9 | -import pylab | ||
10 | import scipy.cluster.hierarchy as sch | 8 | import scipy.cluster.hierarchy as sch |
11 | from scipy.spatial.distance import squareform | 9 | from scipy.spatial.distance import squareform |
12 | from mpl_toolkits.mplot3d import axes3d | 10 | from mpl_toolkits.mplot3d import axes3d |
... | @@ -186,6 +184,8 @@ def stats_len(mappings_list, points): | ... | @@ -186,6 +184,8 @@ def stats_len(mappings_list, points): |
186 | plt.savefig("results/full_length_distribs.png") | 184 | plt.savefig("results/full_length_distribs.png") |
187 | 185 | ||
188 | def to_dist_matrix(f): | 186 | def to_dist_matrix(f): |
187 | + if path.isfile("data/"+f+".npy"): | ||
188 | + return 0 | ||
189 | print(f) | 189 | print(f) |
190 | dm = DistanceCalculator('identity') | 190 | dm = DistanceCalculator('identity') |
191 | with open(path_to_seq_data+"realigned/"+f+"++.afa") as al_file: | 191 | with open(path_to_seq_data+"realigned/"+f+"++.afa") as al_file: |
... | @@ -198,13 +198,21 @@ def to_dist_matrix(f): | ... | @@ -198,13 +198,21 @@ def to_dist_matrix(f): |
198 | return 0 | 198 | return 0 |
199 | 199 | ||
200 | def seq_idty(mappings_list): | 200 | def seq_idty(mappings_list): |
201 | + # compute distance matrices | ||
202 | + p = Pool(processes=8) | ||
203 | + pbar = tqdm(total=len(mappings_list.keys()), desc="RNA families", position=0, leave=True) | ||
204 | + for i, _ in enumerate(p.imap_unordered(to_dist_matrix, sorted(mappings_list.keys()))): | ||
205 | + pbar.update(1) | ||
206 | + pbar.close() | ||
207 | + p.close() | ||
208 | + p.join() | ||
209 | + | ||
210 | + # load them | ||
201 | fam_arrays = [] | 211 | fam_arrays = [] |
202 | for f in sorted(mappings_list.keys()): | 212 | for f in sorted(mappings_list.keys()): |
203 | if path.isfile("data/"+f+".npy"): | 213 | if path.isfile("data/"+f+".npy"): |
204 | fam_arrays.append(np.load("data/"+f+".npy")) | 214 | fam_arrays.append(np.load("data/"+f+".npy")) |
205 | else: | 215 | else: |
206 | - # to_dist_matrix(f) | ||
207 | - # fam_arrays.append(np.load("data/"+f+".npy")) | ||
208 | fam_arrays.append([]) | 216 | fam_arrays.append([]) |
209 | 217 | ||
210 | fig, axs = plt.subplots(11,7, figsize=(25,25)) | 218 | fig, axs = plt.subplots(11,7, figsize=(25,25)) |
... | @@ -289,4 +297,4 @@ if __name__ == "__main__": | ... | @@ -289,4 +297,4 @@ if __name__ == "__main__": |
289 | seq_idty(mappings_list) | 297 | seq_idty(mappings_list) |
290 | # stats_len(mappings_list, rna_points) | 298 | # stats_len(mappings_list, rna_points) |
291 | 299 | ||
292 | - | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
300 | + | ... | ... |
-
Please register or login to post a comment