Showing
2 changed files
with
130 additions
and
99 deletions
... | @@ -1483,7 +1483,7 @@ class Pipeline: | ... | @@ -1483,7 +1483,7 @@ class Pipeline: |
1483 | os.makedirs(path_to_seq_data + "realigned/3D_only", exist_ok=True) | 1483 | os.makedirs(path_to_seq_data + "realigned/3D_only", exist_ok=True) |
1484 | subprocess.run(["cp", path_to_seq_data + "realigned/*_3d_only.afa", path_to_seq_data + "realigned/3d_only" ]) | 1484 | subprocess.run(["cp", path_to_seq_data + "realigned/*_3d_only.afa", path_to_seq_data + "realigned/3d_only" ]) |
1485 | subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_alignments_latest.tar.gz"]) | 1485 | subprocess.run(["rm", "-f", runDir + f"/archive/RNANET_alignments_latest.tar.gz"]) |
1486 | - subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3d_only" , "-czf", runDir + f"/archive/RNANET_alignments_latest.tar.gz", "."]) | 1486 | + subprocess.run(["tar", "-C", path_to_seq_data + "realigned/3D_only" , "-czf", runDir + f"/archive/RNANET_alignments_latest.tar.gz", "."]) |
1487 | 1487 | ||
1488 | def sanitize_database(self): | 1488 | def sanitize_database(self): |
1489 | """Searches for issues in the database and correct them""" | 1489 | """Searches for issues in the database and correct them""" | ... | ... |
... | @@ -911,128 +911,150 @@ def general_stats(): | ... | @@ -911,128 +911,150 @@ def general_stats(): |
911 | fig.savefig(runDir + "/results/figures/Nfamilies.png") | 911 | fig.savefig(runDir + "/results/figures/Nfamilies.png") |
912 | plt.close() | 912 | plt.close() |
913 | 913 | ||
914 | -def get_matrix_euclidian_distance(cif_file, aligned_seq, consider_all_atoms): | 914 | +def par_distance_matrix(filelist, f, label, consider_all_atoms, s): |
915 | - """ | 915 | + |
916 | - This function | 916 | + # Identify the right 3D file |
917 | - - loads the coordinates and the alignment, reconctructs the alignment but with coordinates, considering gaps, and | 917 | + filename = '' |
918 | - - compute the matrix of euclidian distances. | 918 | + for file in filelist: |
919 | + if file.startswith(s.id.replace('-', '').replace('[', '_').replace(']', '_')): | ||
920 | + filename = path_to_3D_data + "rna_mapped_to_Rfam/" + file | ||
921 | + break | ||
922 | + if not len(filename): | ||
923 | + return None, None, None | ||
924 | + | ||
925 | + # Get the coordinates of every existing nt in the 3D file | ||
926 | + try: | ||
927 | + coordinates = nt_3d_centers(filename, consider_all_atoms) | ||
928 | + if not len(coordinates): | ||
929 | + # there is not nucleotides in the file, or no C1' atoms for example. | ||
930 | + warn("No C1' atoms in " + filename) | ||
931 | + return None, None, None | ||
932 | + except FileNotFoundError: | ||
933 | + return None, None, None | ||
919 | 934 | ||
920 | - Returns: | ||
921 | - The 2D np.array of euclidian distances between pairs of nucleotides, with np.NaNs in gap columns. | ||
922 | - """ | ||
923 | - # Load the baricenter coordinates | ||
924 | - coordinates = nt_3d_centers(cif_file, consider_all_atoms) | ||
925 | 935 | ||
926 | - # reconstruct the alignment but with coordinates | 936 | + # Get the coordinates of every position in the alignment |
927 | nb_gap = 0 | 937 | nb_gap = 0 |
928 | coordinates_with_gaps = [] | 938 | coordinates_with_gaps = [] |
929 | - for i in range(len(aligned_seq)): | 939 | + for i, letter in enumerate(s.seq): |
930 | - if aligned_seq[i] == '.' or aligned_seq[i] == '-': | 940 | + if letter in "-.": |
931 | - nb_gap = nb_gap + 1 | 941 | + nb_gap += 1 |
932 | - coordinates_with_gaps.append('NA') | 942 | + coordinates_with_gaps.append(np.nan) |
933 | else: | 943 | else: |
934 | coordinates_with_gaps.append(coordinates[i - nb_gap]) | 944 | coordinates_with_gaps.append(coordinates[i - nb_gap]) |
935 | - | ||
936 | - nb_nucleotides = len(coordinates_with_gaps) # number of nucleotides | ||
937 | - matrix = np.zeros((nb_nucleotides, nb_nucleotides)) # create a new empty matrix of size nxn | ||
938 | 945 | ||
939 | - # Fill this new matrix with the euclidians distances between all amino acids considering gaps: | 946 | + # Build the pairwise distances |
940 | - for i in range(nb_nucleotides): | 947 | + print("> Computing distances for", s.id) |
941 | - for j in range(nb_nucleotides): | 948 | + d = np.zeros((len(s.seq), len(s.seq)), dtype=np.float16) |
942 | - if coordinates_with_gaps[i] == 'NA' or coordinates_with_gaps[j] == 'NA': | 949 | + for i in range(len(s.seq)): |
943 | - matrix[i][j] = np.nan | 950 | + for j in range(len(s.seq)): |
951 | + if np.isnan(coordinates_with_gaps[i]).any() or np.isnan(coordinates_with_gaps[j]).any(): | ||
952 | + d[i,j] = np.nan | ||
944 | else: | 953 | else: |
945 | - matrix[i][j] = round(get_euclidian_distance(coordinates_with_gaps[i], coordinates_with_gaps[j]),3) | 954 | + d[i,j] = get_euclidian_distance(coordinates_with_gaps[i], coordinates_with_gaps[j]) |
946 | - return(matrix) | 955 | + |
956 | + print("> finished.") | ||
957 | + np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/'+ s.id.strip("\'") + '.csv', d, delimiter=",", fmt="%.3f") | ||
958 | + return 1-np.isnan(d).astype(int), np.nan_to_num(d), np.nan_to_num(d*d) | ||
947 | 959 | ||
948 | @trace_unhandled_exceptions | 960 | @trace_unhandled_exceptions |
949 | -def get_avg_std_distance_matrix(f, consider_all_atoms): | 961 | +def get_avg_std_distance_matrix(f, consider_all_atoms, multithread=False): |
950 | - # Get a worker number to position the progress bar | ||
951 | - global idxQueue | ||
952 | - thr_idx = idxQueue.get() | ||
953 | - | ||
954 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} {f} residue distance matrices") | ||
955 | - | ||
956 | if consider_all_atoms: | 962 | if consider_all_atoms: |
957 | label = "base" | 963 | label = "base" |
958 | else: | 964 | else: |
959 | label = "backbone" | 965 | label = "backbone" |
960 | 966 | ||
967 | + if not multithread: | ||
968 | + # This function call is for ONE worker. | ||
969 | + # Get a worker number for it to position the progress bar | ||
970 | + global idxQueue | ||
971 | + thr_idx = idxQueue.get() | ||
972 | + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} {f} {label} distance matrices") | ||
973 | + | ||
961 | os.makedirs(runDir + '/results/distance_matrices/' + f + '_' + label, exist_ok=True ) | 974 | os.makedirs(runDir + '/results/distance_matrices/' + f + '_' + label, exist_ok=True ) |
962 | 975 | ||
963 | - | ||
964 | - family_matrices = [] | ||
965 | align = AlignIO.read(path_to_seq_data + f"realigned/{f}_3d_only.afa", "fasta") | 976 | align = AlignIO.read(path_to_seq_data + f"realigned/{f}_3d_only.afa", "fasta") |
977 | + ncols = align.get_alignment_length() | ||
978 | + counts = np.zeros((ncols, ncols)) | ||
979 | + avg = np.zeros((ncols, ncols)) | ||
980 | + std = np.zeros((ncols, ncols)) | ||
966 | found = 0 | 981 | found = 0 |
967 | notfound = 0 | 982 | notfound = 0 |
968 | - pbar = tqdm(total = len(align), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} {label} distance matrices", unit="chains", leave=False) | ||
969 | - pbar.update(0) | ||
970 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: | 983 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: |
971 | conn.execute('pragma journal_mode=wal') | 984 | conn.execute('pragma journal_mode=wal') |
972 | r = sql_ask_database(conn, f"SELECT structure_id, '_1_', chain_name, '_', CAST(pdb_start AS TEXT), '-', CAST(pdb_end AS TEXT) FROM chain WHERE rfam_acc='{f}';") | 985 | r = sql_ask_database(conn, f"SELECT structure_id, '_1_', chain_name, '_', CAST(pdb_start AS TEXT), '-', CAST(pdb_end AS TEXT) FROM chain WHERE rfam_acc='{f}';") |
973 | - filelist = [ ''.join(list(x))+'.cif' for x in r ] | 986 | + filelist = sorted([ ''.join(list(x))+'.cif' for x in r ]) |
974 | 987 | ||
975 | - for s in align: | 988 | + if not multithread: |
976 | - filename = '' | 989 | + pbar = tqdm(total = len(align), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} {label} distance matrices", unit="chains", leave=False) |
977 | - for file in filelist: | 990 | + pbar.update(0) |
978 | - if file.startswith(s.id.replace('-', '').replace('[', '_').replace(']', '_')): | 991 | + for s in align: |
979 | - filename = path_to_3D_data + "rna_mapped_to_Rfam/" + file | 992 | + contrib, d, dsquared = par_distance_matrix(filelist, f, label, consider_all_atoms, s) |
980 | - break | 993 | + if d is not None: |
981 | - if len(filename): | 994 | + found += 1 |
982 | - found += 1 | 995 | + counts += contrib |
983 | - try: | 996 | + avg += d |
984 | - euclidian_distance = get_matrix_euclidian_distance(filename, s.seq, consider_all_atoms) | 997 | + std += dsquared |
985 | - np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/'+ s.id.strip("\'") + '.csv', euclidian_distance, delimiter=",", fmt="%.3f") | 998 | + else: |
986 | - family_matrices.append(euclidian_distance) | ||
987 | - except FileNotFoundError: | ||
988 | - found -= 1 | ||
989 | notfound += 1 | 999 | notfound += 1 |
990 | - else: | 1000 | + pbar.update(1) |
991 | - notfound += 1 | 1001 | + pbar.close() |
992 | - pbar.update(1) | 1002 | + else: |
1003 | + # We split the work for one family on multiple workers. | ||
1004 | + | ||
1005 | + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | ||
1006 | + try: | ||
1007 | + fam_pbar = tqdm(total=len(align), desc=f"{f} {label} pair distances", position=0, unit="chain", leave=True) | ||
1008 | + # Apply work_pssm_remap to each RNA family | ||
1009 | + for i, (contrib, d, dsquared) in enumerate(p.imap_unordered(partial(par_distance_matrix, filelist, f, label, consider_all_atoms), align, chunksize=1)): | ||
1010 | + if d is not None: | ||
1011 | + found += 1 | ||
1012 | + counts += contrib | ||
1013 | + avg += d | ||
1014 | + std += dsquared | ||
1015 | + else: | ||
1016 | + notfound += 1 | ||
1017 | + fam_pbar.update(1) | ||
1018 | + fam_pbar.close() | ||
1019 | + p.close() | ||
1020 | + p.join() | ||
1021 | + except KeyboardInterrupt: | ||
1022 | + warn("KeyboardInterrupt, terminating workers.", error=True) | ||
1023 | + fam_pbar.close() | ||
1024 | + p.terminate() | ||
1025 | + p.join() | ||
1026 | + exit(1) | ||
1027 | + | ||
993 | 1028 | ||
994 | # Calculation of the average matrix | 1029 | # Calculation of the average matrix |
995 | - avgarray = np.array(family_matrices) | 1030 | + avg = avg/counts |
996 | - if len(avgarray) == 0 or np.prod(avgarray.shape) == 0: | 1031 | + np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_average.csv' , avg, delimiter=",", fmt="%.3f") |
997 | - warn(f"Something's wrong with the shapes: {avgarray.shape}", error=True) | ||
998 | - with warnings.catch_warnings(): | ||
999 | - warnings.simplefilter("ignore", category=RuntimeWarning) | ||
1000 | - matrix_average_distances = np.nanmean(avgarray, axis=0 ) | ||
1001 | - | ||
1002 | - if len(matrix_average_distances) != 0: | ||
1003 | - matrix_average_distances = np.nan_to_num(matrix_average_distances) | ||
1004 | - np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_average.csv' , np.triu(matrix_average_distances), delimiter=",", fmt="%.3f") | ||
1005 | 1032 | ||
1006 | fig, ax = plt.subplots() | 1033 | fig, ax = plt.subplots() |
1007 | - im = ax.imshow(matrix_average_distances) | 1034 | + im = ax.imshow(avg) |
1008 | cbar = ax.figure.colorbar(im, ax=ax) | 1035 | cbar = ax.figure.colorbar(im, ax=ax) |
1009 | cbar.ax.set_ylabel("Angströms", rotation=-90, va="bottom") | 1036 | cbar.ax.set_ylabel("Angströms", rotation=-90, va="bottom") |
1010 | - ax.set_title("Average distance between residues (Angströms)") | 1037 | + ax.set_title(f"Average distance between {f} residues (Angströms)") |
1011 | fig.tight_layout() | 1038 | fig.tight_layout() |
1012 | fig.savefig(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_average.png', dpi=300) | 1039 | fig.savefig(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_average.png', dpi=300) |
1013 | plt.close() | 1040 | plt.close() |
1014 | 1041 | ||
1015 | - # Calculation of the standard deviation matrix | 1042 | + # Calculation of the standard deviation matrix by the Huygens theorem |
1016 | - with warnings.catch_warnings(): | 1043 | + std = np.sqrt(std/counts - np.power(avg, 2)) |
1017 | - warnings.simplefilter("ignore", category=RuntimeWarning) | 1044 | + np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_stdev.csv' , std, delimiter=",", fmt="%.3f") |
1018 | - matrix_standard_deviation_distances = np.nanstd(avgarray, axis=0 ) | ||
1019 | - | ||
1020 | - if len(matrix_standard_deviation_distances) != 0: | ||
1021 | - matrix_standard_deviation_distances = np.nan_to_num(matrix_standard_deviation_distances) | ||
1022 | - np.savetxt(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_stdev.csv' , np.triu(matrix_standard_deviation_distances), delimiter=",", fmt="%.3f") | ||
1023 | 1045 | ||
1024 | fig, ax = plt.subplots() | 1046 | fig, ax = plt.subplots() |
1025 | - im = ax.imshow(matrix_standard_deviation_distances) | 1047 | + im = ax.imshow(std) |
1026 | cbar = ax.figure.colorbar(im, ax=ax) | 1048 | cbar = ax.figure.colorbar(im, ax=ax) |
1027 | cbar.ax.set_ylabel("Angströms", rotation=-90, va="bottom") | 1049 | cbar.ax.set_ylabel("Angströms", rotation=-90, va="bottom") |
1028 | - ax.set_title("Average distance between residues (Angströms)") | 1050 | + ax.set_title(f"Standard deviation of distances between {f} residues (Angströms)") |
1029 | fig.tight_layout() | 1051 | fig.tight_layout() |
1030 | fig.savefig(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_std.png', dpi=300) | 1052 | fig.savefig(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_std.png', dpi=300) |
1031 | plt.close() | 1053 | plt.close() |
1032 | 1054 | ||
1033 | # Save log | 1055 | # Save log |
1034 | with open(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '.log', 'a') as logfile: | 1056 | with open(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '.log', 'a') as logfile: |
1035 | - logfile.write(str(found)+ " chains taken into account for computation. "+ str(notfound)+ " were not found.\n") | 1057 | + logfile.write(str(found)+ " chains taken into account for computation. "+ str(notfound)+ " were not found/without atoms.\n") |
1036 | 1058 | ||
1037 | # Save associated nucleotide frequencies (off-topic but convenient to do it here) | 1059 | # Save associated nucleotide frequencies (off-topic but convenient to do it here) |
1038 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: | 1060 | with sqlite3.connect(runDir + "/results/RNANet.db") as conn: |
... | @@ -1040,10 +1062,9 @@ def get_avg_std_distance_matrix(f, consider_all_atoms): | ... | @@ -1040,10 +1062,9 @@ def get_avg_std_distance_matrix(f, consider_all_atoms): |
1040 | df = pd.read_sql_query(f"SELECT freq_A, freq_C, freq_G, freq_U, freq_other, gap_percent, consensus FROM align_column WHERE rfam_acc = '{f}' AND index_ali > 0 ORDER BY index_ali ASC;", conn) | 1062 | df = pd.read_sql_query(f"SELECT freq_A, freq_C, freq_G, freq_U, freq_other, gap_percent, consensus FROM align_column WHERE rfam_acc = '{f}' AND index_ali > 0 ORDER BY index_ali ASC;", conn) |
1041 | df.to_csv(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_frequencies.csv', float_format="%.3f") | 1063 | df.to_csv(runDir + '/results/distance_matrices/' + f + '_'+ label + '/' + f + '_frequencies.csv', float_format="%.3f") |
1042 | 1064 | ||
1043 | - pbar.close() | 1065 | + if not multithread: |
1044 | - | 1066 | + idxQueue.put(thr_idx) # replace the thread index in the queue |
1045 | - idxQueue.put(thr_idx) # replace the thread index in the queue | 1067 | + setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") |
1046 | - setproctitle(f"RNANet statistics.py Worker {thr_idx+1} finished") | ||
1047 | return 0 | 1068 | return 0 |
1048 | 1069 | ||
1049 | def log_to_pbar(pbar): | 1070 | def log_to_pbar(pbar): |
... | @@ -1069,33 +1090,34 @@ def nt_3d_centers(cif_file, consider_all_atoms): | ... | @@ -1069,33 +1090,34 @@ def nt_3d_centers(cif_file, consider_all_atoms): |
1069 | """Return the nucleotides' coordinates, summarizing a nucleotide by only one point. | 1090 | """Return the nucleotides' coordinates, summarizing a nucleotide by only one point. |
1070 | If consider_all_atoms : barycentre is used | 1091 | If consider_all_atoms : barycentre is used |
1071 | else: C1' atom is the nucleotide | 1092 | else: C1' atom is the nucleotide |
1093 | + | ||
1094 | + Some chains have no C1' (e.g. 4v7f-3), therefore, an empty result is returned. | ||
1072 | """ | 1095 | """ |
1073 | result =[] | 1096 | result =[] |
1074 | structure = MMCIFParser().get_structure(cif_file, cif_file) | 1097 | structure = MMCIFParser().get_structure(cif_file, cif_file) |
1075 | 1098 | ||
1076 | - if consider_all_atoms == True: | 1099 | + for model in structure: |
1077 | - for model in structure: | 1100 | + for chain in model: |
1078 | - for chain in model: | 1101 | + for residue in chain: |
1079 | - for residue in chain: | 1102 | + if consider_all_atoms: |
1080 | temp_list = [] | 1103 | temp_list = [] |
1081 | - res_isobaricentre = 0 | ||
1082 | for atom in residue: | 1104 | for atom in residue: |
1083 | temp_list.append(atom.get_coord()) | 1105 | temp_list.append(atom.get_coord()) |
1084 | lg = len(temp_list) | 1106 | lg = len(temp_list) |
1085 | - | ||
1086 | summ = np.sum(temp_list, axis = 0) | 1107 | summ = np.sum(temp_list, axis = 0) |
1087 | res_isobaricentre = [summ[0]/lg, summ[1]/lg, summ[2]/lg] | 1108 | res_isobaricentre = [summ[0]/lg, summ[1]/lg, summ[2]/lg] |
1088 | result.append([res_isobaricentre[0], res_isobaricentre[1], res_isobaricentre[2]]) | 1109 | result.append([res_isobaricentre[0], res_isobaricentre[1], res_isobaricentre[2]]) |
1089 | - | 1110 | + else: |
1090 | - elif consider_all_atoms == False: | 1111 | + coordinates = None |
1091 | - for model in structure: | ||
1092 | - for chain in model: | ||
1093 | - for residue in chain: | ||
1094 | for atom in residue: | 1112 | for atom in residue: |
1095 | if atom.get_name() == "C1'": | 1113 | if atom.get_name() == "C1'": |
1096 | coordinates = atom.get_coord() | 1114 | coordinates = atom.get_coord() |
1097 | - res = [coordinates[0], coordinates[1], coordinates[2]] | 1115 | + if coordinates is None: |
1098 | - result.append(res) | 1116 | + # Residue has no C1' |
1117 | + res = np.nan | ||
1118 | + else: | ||
1119 | + res = [coordinates[0], coordinates[1], coordinates[2]] | ||
1120 | + result.append(res) | ||
1099 | return(result) | 1121 | return(result) |
1100 | 1122 | ||
1101 | def get_euclidian_distance(L1, L2): | 1123 | def get_euclidian_distance(L1, L2): |
... | @@ -1214,9 +1236,9 @@ if __name__ == "__main__": | ... | @@ -1214,9 +1236,9 @@ if __name__ == "__main__": |
1214 | e2 = file.split('_')[1] | 1236 | e2 = file.split('_')[1] |
1215 | e3 = file.split('_')[2] | 1237 | e3 = file.split('_')[2] |
1216 | extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) | 1238 | extracted_chains.append(e1 + '[' + e2 + ']' + '-' + e3) |
1217 | - for f in famlist: | 1239 | + for f in [ x for x in famlist if (x not in LSU_set and x not in SSU_set) ]: # Process the rRNAs later only 3 by 3 |
1218 | - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True))) | 1240 | + joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, True, False))) |
1219 | - joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False))) | 1241 | + joblist.append(Job(function=get_avg_std_distance_matrix, args=(f, False, False))) |
1220 | joblist.append(Job(function=stats_len)) # Computes figures | 1242 | joblist.append(Job(function=stats_len)) # Computes figures |
1221 | joblist.append(Job(function=stats_freq)) # updates the database | 1243 | joblist.append(Job(function=stats_freq)) # updates the database |
1222 | for f in famlist: | 1244 | for f in famlist: |
... | @@ -1224,7 +1246,7 @@ if __name__ == "__main__": | ... | @@ -1224,7 +1246,7 @@ if __name__ == "__main__": |
1224 | if f not in ignored: | 1246 | if f not in ignored: |
1225 | joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database | 1247 | joblist.append(Job(function=to_id_matrix, args=(f,))) # updates the database |
1226 | 1248 | ||
1227 | - p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=int(0.7*nworkers)) | 1249 | + p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) |
1228 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) | 1250 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) |
1229 | 1251 | ||
1230 | try: | 1252 | try: |
... | @@ -1242,6 +1264,15 @@ if __name__ == "__main__": | ... | @@ -1242,6 +1264,15 @@ if __name__ == "__main__": |
1242 | except: | 1264 | except: |
1243 | print("Something went wrong") | 1265 | print("Something went wrong") |
1244 | 1266 | ||
1267 | + # Now process the memory-heavy tasks family by family | ||
1268 | + if DO_AVG_DISTANCE_MATRIX: | ||
1269 | + for f in LSU_set: | ||
1270 | + get_avg_std_distance_matrix(f, True, True) | ||
1271 | + get_avg_std_distance_matrix(f, False, True) | ||
1272 | + for f in SSU_set: | ||
1273 | + get_avg_std_distance_matrix(f, True, True) | ||
1274 | + get_avg_std_distance_matrix(f, False, True) | ||
1275 | + | ||
1245 | print() | 1276 | print() |
1246 | print() | 1277 | print() |
1247 | 1278 | ... | ... |
-
Please register or login to post a comment