Showing
1 changed file
with
342 additions
and
12 deletions
... | @@ -17,6 +17,7 @@ import scipy.cluster.hierarchy as sch | ... | @@ -17,6 +17,7 @@ import scipy.cluster.hierarchy as sch |
17 | import sklearn | 17 | import sklearn |
18 | import json | 18 | import json |
19 | import pickle | 19 | import pickle |
20 | +import Bio | ||
20 | from scipy.spatial.distance import squareform | 21 | from scipy.spatial.distance import squareform |
21 | from mpl_toolkits.mplot3d import axes3d | 22 | from mpl_toolkits.mplot3d import axes3d |
22 | from Bio import AlignIO, SeqIO | 23 | from Bio import AlignIO, SeqIO |
... | @@ -1639,7 +1640,7 @@ def dist_atoms_hire_RNA (f) : | ... | @@ -1639,7 +1640,7 @@ def dist_atoms_hire_RNA (f) : |
1639 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} dist_atoms_hire_RNA({f})") | 1640 | setproctitle(f"RNANet statistics.py Worker {thr_idx+1} dist_atoms_hire_RNA({f})") |
1640 | 1641 | ||
1641 | parser=MMCIFParser() | 1642 | parser=MMCIFParser() |
1642 | - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) | 1643 | + s = parser.get_structure(name, os.path.abspath(path_to_3D_data +"rna_only/" + f)) |
1643 | chain = next(s[0].get_chains()) | 1644 | chain = next(s[0].get_chains()) |
1644 | residues=list(chain.get_residues()) | 1645 | residues=list(chain.get_residues()) |
1645 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} dist_atoms_hire_RNA", unit="residu", leave=False) | 1646 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} dist_atoms_hire_RNA", unit="residu", leave=False) |
... | @@ -1800,7 +1801,7 @@ def angles_torsion_hire_RNA(f): | ... | @@ -1800,7 +1801,7 @@ def angles_torsion_hire_RNA(f): |
1800 | os.makedirs(runDir+"/results/HiRE-RNA/torsions/", exist_ok=True) | 1801 | os.makedirs(runDir+"/results/HiRE-RNA/torsions/", exist_ok=True) |
1801 | 1802 | ||
1802 | parser=MMCIFParser() | 1803 | parser=MMCIFParser() |
1803 | - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) | 1804 | + s = parser.get_structure(name, os.path.abspath(path_to_3D_data + "rna_only/" + f)) |
1804 | chain = next(s[0].get_chains()) | 1805 | chain = next(s[0].get_chains()) |
1805 | residues=list(chain.get_residues()) | 1806 | residues=list(chain.get_residues()) |
1806 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) | 1807 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) |
... | @@ -1926,7 +1927,7 @@ def angles_plans_hire_RNA(f): | ... | @@ -1926,7 +1927,7 @@ def angles_plans_hire_RNA(f): |
1926 | os.makedirs(runDir+"/results/HiRE-RNA/angles/", exist_ok=True) | 1927 | os.makedirs(runDir+"/results/HiRE-RNA/angles/", exist_ok=True) |
1927 | 1928 | ||
1928 | parser=MMCIFParser() | 1929 | parser=MMCIFParser() |
1929 | - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) | 1930 | + s = parser.get_structure(name, os.path.abspath(path_to_3D_data + "rna_only/" + f)) |
1930 | chain = next(s[0].get_chains()) | 1931 | chain = next(s[0].get_chains()) |
1931 | residues=list(chain.get_residues()) | 1932 | residues=list(chain.get_residues()) |
1932 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) | 1933 | pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) |
... | @@ -2045,6 +2046,7 @@ def GMM_histo(data, name_data, x, y, nb_fichiers) : | ... | @@ -2045,6 +2046,7 @@ def GMM_histo(data, name_data, x, y, nb_fichiers) : |
2045 | def GMM_histo_without_saving(data, name_data, x, y, nb_fichiers) : | 2046 | def GMM_histo_without_saving(data, name_data, x, y, nb_fichiers) : |
2046 | ''' | 2047 | ''' |
2047 | Plot Gaussian-Mixture-Model on histograms | 2048 | Plot Gaussian-Mixture-Model on histograms |
2049 | + But without saving and close the figure | ||
2048 | ''' | 2050 | ''' |
2049 | histogram(data, name_data, x, y, nb_fichiers)#plot the histogram | 2051 | histogram(data, name_data, x, y, nb_fichiers)#plot the histogram |
2050 | 2052 | ||
... | @@ -2564,6 +2566,7 @@ def dictio(ld): | ... | @@ -2564,6 +2566,7 @@ def dictio(ld): |
2564 | key = pdb identifier of the structure | 2566 | key = pdb identifier of the structure |
2565 | value = list of RNA chains | 2567 | value = list of RNA chains |
2566 | ''' | 2568 | ''' |
2569 | + dictionnaire=dict() | ||
2567 | pdb=[] | 2570 | pdb=[] |
2568 | for f in ld: | 2571 | for f in ld: |
2569 | pdb_id=str.split(f, '_')[0] | 2572 | pdb_id=str.split(f, '_')[0] |
... | @@ -2648,20 +2651,336 @@ def graphe(type_LW, angle_1, angle_2, angle_3, angle_4, distance, nb): | ... | @@ -2648,20 +2651,336 @@ def graphe(type_LW, angle_1, angle_2, angle_3, angle_4, distance, nb): |
2648 | 2651 | ||
2649 | plt.subplot(2, 1, 1) | 2652 | plt.subplot(2, 1, 1) |
2650 | 2653 | ||
2651 | - GMM_tot(angle_1, "C4'-C1'-B1", nb, 'cyan' ) | 2654 | + if len(angle_1) > 0 : |
2652 | - GMM_tot(angle_2, "C1'-B1-B1pair", nb, 'magenta') | 2655 | + GMM_tot(angle_1, "C4'-C1'-B1", nb, 'cyan' ) |
2653 | - GMM_tot(angle_3, "B1-B1pair-C1'pair", nb, "yellow") | 2656 | + if len(angle_2) > 0 : |
2654 | - GMM_tot(angle_4, "B1pair-C1'pair-C4'pair", nb, 'olive') | 2657 | + GMM_tot(angle_2, "C1'-B1-B1pair", nb, 'magenta') |
2658 | + if len(angle_3) > 0 : | ||
2659 | + GMM_tot(angle_3, "B1-B1pair-C1'pair", nb, "yellow") | ||
2660 | + if len(angle_4) > 0 : | ||
2661 | + GMM_tot(angle_4, "B1pair-C1'pair-C4'pair", nb, 'olive') | ||
2655 | plt.xlabel("Angle(degré)") | 2662 | plt.xlabel("Angle(degré)") |
2656 | plt.title("GMM des angles plans pour les appariements " +type_LW +" (" +str(nb)+ " valeurs)", fontsize=10) | 2663 | plt.title("GMM des angles plans pour les appariements " +type_LW +" (" +str(nb)+ " valeurs)", fontsize=10) |
2657 | 2664 | ||
2658 | plt.subplot(2, 1, 2) | 2665 | plt.subplot(2, 1, 2) |
2659 | - GMM_histo_without_saving(distance, "Distance pointes " + type_LW, "Distance (Angström)", "Densité", nb) | 2666 | + if len(distance)>0 : |
2667 | + GMM_histo_without_saving(distance, "Distance pointes " + type_LW, "Distance (Angström)", "Densité", nb) | ||
2660 | 2668 | ||
2661 | plt.savefig("Mesures appariements " +type_LW+ " (" +str(nb)+ " valeurs).png" ) | 2669 | plt.savefig("Mesures appariements " +type_LW+ " (" +str(nb)+ " valeurs).png" ) |
2662 | plt.close() | 2670 | plt.close() |
2663 | 2671 | ||
2672 | +def appariements(chain, df): | ||
2673 | + ''' | ||
2674 | + Cleanup of the dataset | ||
2675 | + measurements of distances and angles between paired nucleotides in the chain | ||
2676 | + ''' | ||
2677 | + liste_dist=[] | ||
2678 | + warnings.simplefilter(action="ignore", category=SettingWithCopyWarning) | ||
2679 | + | ||
2680 | + pairs=df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep | ||
2681 | + for i in range(pairs.shape[0]): #we remove the lines where no pairing (NaN in paired) | ||
2682 | + index_with_nan=pairs.index[pairs.iloc[:,2].isnull()] | ||
2683 | + pairs.drop(index_with_nan, 0, inplace=True) | ||
2684 | + | ||
2685 | + paired_int=[] | ||
2686 | + for i in pairs.index:# convert values from paired to integers or lists of integers | ||
2687 | + paired=pairs.at[i, 'paired'] | ||
2688 | + if type(paired) is np.int64 or type(paired) is np.float64: | ||
2689 | + paired_int.append(int(paired)) | ||
2690 | + else : #strings | ||
2691 | + if len(paired)<3 : #a single pairing | ||
2692 | + paired_int.append(int(paired)) | ||
2693 | + else : #several pairings | ||
2694 | + paired=paired.split(',') | ||
2695 | + l=[int(i) for i in paired] | ||
2696 | + paired_int.append(l) | ||
2697 | + | ||
2698 | + pair_type_LW_bis=[] | ||
2699 | + for j in pairs.index: | ||
2700 | + pair_type_LW = pairs.at[j, 'pair_type_LW'] | ||
2701 | + if len(pair_type_LW)<4 : #a single pairing | ||
2702 | + pair_type_LW_bis.append(pair_type_LW) | ||
2703 | + else : #several pairings | ||
2704 | + pair_type_LW=pair_type_LW.split(',') | ||
2705 | + l=[i for i in pair_type_LW] | ||
2706 | + pair_type_LW_bis.append(pair_type_LW) | ||
2707 | + | ||
2708 | + #addition of these new columns | ||
2709 | + pairs.insert(4, "paired_int", paired_int, True) | ||
2710 | + pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True) | ||
2711 | + | ||
2712 | + indexNames=pairs[pairs['paired_int'] == 0].index | ||
2713 | + pairs.drop(indexNames, inplace=True)#deletion of lines with a 0 in paired_int (matching to another RNA chain) | ||
2714 | + | ||
2715 | + for i in pairs.index: | ||
2716 | + ''' | ||
2717 | + calculations for each row of the pairs dataset | ||
2718 | + ''' | ||
2719 | + resseq=pairs.at[i, 'old_nt_resnum'] #number of the residue in the chain | ||
2720 | + #code to delete letters in old_nt_resnum | ||
2721 | + icode_res=' ' | ||
2722 | + if type(resseq) is str: | ||
2723 | + if resseq[0] != '-' : | ||
2724 | + while resseq.isdigit() is False: | ||
2725 | + l=len(resseq) | ||
2726 | + if icode_res==' ': | ||
2727 | + icode_res=resseq[l-1] | ||
2728 | + else : | ||
2729 | + icode_res=resseq[l-1]+icode_res | ||
2730 | + resseq=resseq[:l-1] | ||
2731 | + resseq=int(resseq) | ||
2732 | + index=pairs.at[i, 'index_chain'] | ||
2733 | + type_LW=pairs.at[i, 'pair_type_LW_bis'] #pairing type | ||
2734 | + num_paired=pairs.at[i, 'paired_int'] #number (index_chain) of the paired nucleotide | ||
2735 | + | ||
2736 | + | ||
2737 | + if type(num_paired) is int or type(num_paired) is np.int64: | ||
2738 | + l=pairs[pairs['index_chain']==num_paired].index.to_list() | ||
2739 | + | ||
2740 | + resnum=pairs.at[l[0], 'old_nt_resnum'] | ||
2741 | + icode_pair=' ' | ||
2742 | + if type(resnum) is str: | ||
2743 | + if resnum[0] != '-' : | ||
2744 | + while resnum.isdigit() is False: | ||
2745 | + l=len(resnum) | ||
2746 | + if icode_pair==' ': | ||
2747 | + icode_pair=resnum[l-1] | ||
2748 | + else : | ||
2749 | + icode_pair=resnum[l-1]+icode_pair | ||
2750 | + resnum=resnum[:l-1] | ||
2751 | + | ||
2752 | + resnum=int(resnum) | ||
2753 | + try : | ||
2754 | + d=dist_pointes(chain[(' ',resseq, icode_res)], chain[(' ', resnum, icode_pair)]) #calculation of the distance between the tips of the paired nucleotides | ||
2755 | + angle=angle_c1_b1(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)]) | ||
2756 | + if d != 0.0: | ||
2757 | + liste_dist.append([chain, type_LW, resseq, resnum, d, angle[0], angle[1], angle[2], angle[3]]) | ||
2758 | + except : | ||
2759 | + pass | ||
2760 | + else : | ||
2761 | + for j in range(len(num_paired)): #if several pairings, process them one by one | ||
2762 | + if num_paired[j] != 0 : | ||
2763 | + l=pairs[pairs['index_chain']==num_paired[j]].index.to_list() | ||
2764 | + | ||
2765 | + resnum=pairs.at[l[0], 'old_nt_resnum'] | ||
2766 | + | ||
2767 | + icode_pair=' ' | ||
2768 | + if type(resnum) is str: | ||
2769 | + if resnum[0] != '-' : | ||
2770 | + while resnum.isdigit() is False: | ||
2771 | + l=len(resnum) | ||
2772 | + if icode_pair==' ': | ||
2773 | + icode_pair=resnum[l-1] | ||
2774 | + else : | ||
2775 | + icode_pair=resnum[l-1]+icode_pair | ||
2776 | + resnum=resnum[:l-1] | ||
2777 | + resnum=int(resnum) | ||
2778 | + try : | ||
2779 | + d=dist_pointes(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)]) | ||
2780 | + angle=angle_c1_b1(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)]) | ||
2781 | + if d != 0.0: | ||
2782 | + liste_dist.append([chain, type_LW[j], resseq, resnum, d, angle[0], angle[1], angle[2], angle[3]]) | ||
2783 | + except: | ||
2784 | + pass | ||
2785 | + | ||
2786 | + return(liste_dist) | ||
2787 | + | ||
2788 | +def parse(cle): | ||
2789 | + l=[] | ||
2790 | + os.makedirs(runDir + "/results/basepairs/", exist_ok=True) | ||
2791 | + with warnings.catch_warnings(): | ||
2792 | + # Ignore the PDB problems. This mostly warns that some chain is discontinuous. | ||
2793 | + warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning) | ||
2794 | + warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning) | ||
2795 | + parser=MMCIFParser() | ||
2796 | + s = parser.get_structure(cle, os.path.abspath(path_to_3D_data + "RNAcifs/" + cle +".cif")) | ||
2797 | + for model in s: | ||
2798 | + for valeur in dictionnaire[cle]: | ||
2799 | + if len(valeur)>2: | ||
2800 | + df_tot=[] | ||
2801 | + for id_chain in valeur: | ||
2802 | + for data in ld: | ||
2803 | + if (len(data)<10): #unmapped | ||
2804 | + chaine=str.split(data, '_') | ||
2805 | + if chaine[0]==cle and chaine[2]==id_chain: | ||
2806 | + df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + data)) | ||
2807 | + if df['index_chain'][0]==1:#ignore files with numbering errors | ||
2808 | + l=appariements(model[id_chain], df) | ||
2809 | + else : | ||
2810 | + for data in ld: | ||
2811 | + if (len(data)<10): #unmapped | ||
2812 | + chaine=str.split(data, '_') | ||
2813 | + if chaine[0]==cle and chaine[2]==valeur: | ||
2814 | + df=pd.read_csv(os.path.abspath(path_to_3D_data + "datapoints/" + data)) | ||
2815 | + if df['index_chain'][0]==1: | ||
2816 | + l=appariements(model[valeur], df) | ||
2817 | + df_calc=pd.DataFrame(l, columns=["Chaine", "type LW", "Resseq", "Num paired", "Distance", "C4'-C1'-B1", "C1'-B1-B1pair", "B1-B1pair-C1'pair", "B1pair-C1'pair-C4'pair"]) | ||
2818 | + df_calc.to_csv(runDir + "/results/basepairs/"+'basepairs '+cle+'_'+valeur+'.csv') | ||
2819 | + | ||
2820 | +def graph_basepairs(): | ||
2821 | + df=pd.read_csv(os.path.abspath(runDir + "/results/basepairs/basepairs.csv")) | ||
2822 | + | ||
2823 | + cWW=df[df['type LW']=='cWW'] | ||
2824 | + cWW_dist=list(cWW["Distance"]) | ||
2825 | + cWW_angle_1=list(cWW["C4'-C1'-B1"]) | ||
2826 | + cWW_angle_2=list(cWW["C1'-B1-B1pair"]) | ||
2827 | + cWW_angle_3=list(cWW["B1-B1pair-C1'pair"]) | ||
2828 | + cWW_angle_4=list(cWW["B1pair-C1'pair-C4'pair"]) | ||
2829 | + tWW=df[df['type LW']=='tWW'] | ||
2830 | + tWW_dist=list(tWW["Distance"]) | ||
2831 | + tWW_angle_1=list(tWW["C4'-C1'-B1"]) | ||
2832 | + tWW_angle_2=list(tWW["C1'-B1-B1pair"]) | ||
2833 | + tWW_angle_3=list(tWW["B1-B1pair-C1'pair"]) | ||
2834 | + tWW_angle_4=list(tWW["B1pair-C1'pair-C4'pair"]) | ||
2835 | + cWH=df[df['type LW']=='cWH'] | ||
2836 | + cWH_dist=list(cWH["Distance"]) | ||
2837 | + cWH_angle_1=list(cWH["C4'-C1'-B1"]) | ||
2838 | + cWH_angle_2=list(cWH["C1'-B1-B1pair"]) | ||
2839 | + cWH_angle_3=list(cWH["B1-B1pair-C1'pair"]) | ||
2840 | + cWH_angle_4=list(cWH["B1pair-C1'pair-C4'pair"]) | ||
2841 | + tWH=df[df['type LW']=='tWH'] | ||
2842 | + tWH_dist=list(tWH["Distance"]) | ||
2843 | + tWH_angle_1=list(tWH["C4'-C1'-B1"]) | ||
2844 | + tWH_angle_2=list(tWH["C1'-B1-B1pair"]) | ||
2845 | + tWH_angle_3=list(tWH["B1-B1pair-C1'pair"]) | ||
2846 | + tWH_angle_4=list(tWH["B1pair-C1'pair-C4'pair"]) | ||
2847 | + cHW=df[df['type LW']=='cHW'] | ||
2848 | + cHW_dist=list(cHW["Distance"]) | ||
2849 | + cHW_angle_1=list(cHW["C4'-C1'-B1"]) | ||
2850 | + cHW_angle_2=list(cHW["C1'-B1-B1pair"]) | ||
2851 | + cHW_angle_3=list(cHW["B1-B1pair-C1'pair"]) | ||
2852 | + cHW_angle_4=list(cHW["B1pair-C1'pair-C4'pair"]) | ||
2853 | + tHW=df[df['type LW']=='tHW'] | ||
2854 | + tHW_dist=list(tHW["Distance"]) | ||
2855 | + tHW_angle_1=list(tHW["C4'-C1'-B1"]) | ||
2856 | + tHW_angle_2=list(tHW["C1'-B1-B1pair"]) | ||
2857 | + tHW_angle_3=list(tHW["B1-B1pair-C1'pair"]) | ||
2858 | + tHW_angle_4=list(tHW["B1pair-C1'pair-C4'pair"]) | ||
2859 | + cWS=df[df['type LW']=='cWS'] | ||
2860 | + cWS_dist=list(cWS["Distance"]) | ||
2861 | + cWS_angle_1=list(cWS["C4'-C1'-B1"]) | ||
2862 | + cWS_angle_2=list(cWS["C1'-B1-B1pair"]) | ||
2863 | + cWS_angle_3=list(cWS["B1-B1pair-C1'pair"]) | ||
2864 | + cWS_angle_4=list(cWS["B1pair-C1'pair-C4'pair"]) | ||
2865 | + tWS=df[df['type LW']=='tWS'] | ||
2866 | + tWS_dist=list(tWS["Distance"]) | ||
2867 | + tWS_angle_1=list(tWS["C4'-C1'-B1"]) | ||
2868 | + tWS_angle_2=list(tWS["C1'-B1-B1pair"]) | ||
2869 | + tWS_angle_3=list(tWS["B1-B1pair-C1'pair"]) | ||
2870 | + tWS_angle_4=list(tWS["B1pair-C1'pair-C4'pair"]) | ||
2871 | + cSW=df[df['type LW']=='cSW'] | ||
2872 | + cSW_dist=list(cSW["Distance"]) | ||
2873 | + cSW_angle_1=list(cSW["C4'-C1'-B1"]) | ||
2874 | + cSW_angle_2=list(cSW["C1'-B1-B1pair"]) | ||
2875 | + cSW_angle_3=list(cSW["B1-B1pair-C1'pair"]) | ||
2876 | + cSW_angle_4=list(cSW["B1pair-C1'pair-C4'pair"]) | ||
2877 | + tSW=df[df['type LW']=='tSW'] | ||
2878 | + tSW_dist=list(tSW["Distance"]) | ||
2879 | + tSW_angle_1=list(tSW["C4'-C1'-B1"]) | ||
2880 | + tSW_angle_2=list(tSW["C1'-B1-B1pair"]) | ||
2881 | + tSW_angle_3=list(tSW["B1-B1pair-C1'pair"]) | ||
2882 | + tSW_angle_4=list(tSW["B1pair-C1'pair-C4'pair"]) | ||
2883 | + cHH=df[df['type LW']=='cHH'] | ||
2884 | + cHH_dist=list(cHH["Distance"]) | ||
2885 | + cHH_angle_1=list(cHH["C4'-C1'-B1"]) | ||
2886 | + cHH_angle_2=list(cHH["C1'-B1-B1pair"]) | ||
2887 | + cHH_angle_3=list(cHH["B1-B1pair-C1'pair"]) | ||
2888 | + cHH_angle_4=list(cHH["B1pair-C1'pair-C4'pair"]) | ||
2889 | + tHH=df[df['type LW']=='tHH'] | ||
2890 | + tHH_dist=list(tHH["Distance"]) | ||
2891 | + tHH_angle_1=list(tHH["C4'-C1'-B1"]) | ||
2892 | + tHH_angle_2=list(tHH["C1'-B1-B1pair"]) | ||
2893 | + tHH_angle_3=list(tHH["B1-B1pair-C1'pair"]) | ||
2894 | + tHH_angle_4=list(tHH["B1pair-C1'pair-C4'pair"]) | ||
2895 | + cSH=df[df['type LW']=='cSH'] | ||
2896 | + cSH_dist=list(cSH["Distance"]) | ||
2897 | + cSH_angle_1=list(cSH["C4'-C1'-B1"]) | ||
2898 | + cSH_angle_2=list(cSH["C1'-B1-B1pair"]) | ||
2899 | + cSH_angle_3=list(cSH["B1-B1pair-C1'pair"]) | ||
2900 | + cSH_angle_4=list(cSH["B1pair-C1'pair-C4'pair"]) | ||
2901 | + tSH=df[df['type LW']=='tSH'] | ||
2902 | + tSH_dist=list(tSH["Distance"]) | ||
2903 | + tSH_angle_1=list(tSH["C4'-C1'-B1"]) | ||
2904 | + tSH_angle_2=list(tSH["C1'-B1-B1pair"]) | ||
2905 | + tSH_angle_3=list(tSH["B1-B1pair-C1'pair"]) | ||
2906 | + tSH_angle_4=list(tSH["B1pair-C1'pair-C4'pair"]) | ||
2907 | + cHS=df[df['type LW']=='cHS'] | ||
2908 | + cHS_dist=list(cHS["Distance"]) | ||
2909 | + cHS_angle_1=list(cHS["C4'-C1'-B1"]) | ||
2910 | + cHS_angle_2=list(cHS["C1'-B1-B1pair"]) | ||
2911 | + cHS_angle_3=list(cHS["B1-B1pair-C1'pair"]) | ||
2912 | + cHS_angle_4=list(cHS["B1pair-C1'pair-C4'pair"]) | ||
2913 | + tHS=df[df['type LW']=='tHS'] | ||
2914 | + tHS_dist=list(tHS["Distance"]) | ||
2915 | + tHS_angle_1=list(tHS["C4'-C1'-B1"]) | ||
2916 | + tHS_angle_2=list(tHS["C1'-B1-B1pair"]) | ||
2917 | + tHS_angle_3=list(tHS["B1-B1pair-C1'pair"]) | ||
2918 | + tHS_angle_4=list(tHS["B1pair-C1'pair-C4'pair"]) | ||
2919 | + cSS=df[df['type LW']=='cSS'] | ||
2920 | + cSS_dist=list(cSS["Distance"]) | ||
2921 | + cSS_angle_1=list(cSS["C4'-C1'-B1"]) | ||
2922 | + cSS_angle_2=list(cSS["C1'-B1-B1pair"]) | ||
2923 | + cSS_angle_3=list(cSS["B1-B1pair-C1'pair"]) | ||
2924 | + cSS_angle_4=list(cSS["B1pair-C1'pair-C4'pair"]) | ||
2925 | + tSS=df[df['type LW']=='tSS'] | ||
2926 | + tSS_dist=list(tSS["Distance"]) | ||
2927 | + tSS_angle_1=list(tSS["C4'-C1'-B1"]) | ||
2928 | + tSS_angle_2=list(tSS["C1'-B1-B1pair"]) | ||
2929 | + tSS_angle_3=list(tSS["B1-B1pair-C1'pair"]) | ||
2930 | + tSS_angle_4=list(tSS["B1pair-C1'pair-C4'pair"]) | ||
2931 | + | ||
2932 | + os.makedirs(runDir + "/results/figures/basepairs/", exist_ok=True) | ||
2933 | + os.chdir(runDir + "/results/figures/basepairs/") | ||
2934 | + | ||
2935 | + graphe('cWW', cWW_angle_1, cWW_angle_2, cWW_angle_3, cWW_angle_4, cWW_dist, len(cWW)) | ||
2936 | + graphe('tWW', tWW_angle_1, tWW_angle_2, tWW_angle_3, tWW_angle_4, tWW_dist, len(tWW)) | ||
2937 | + graphe('cWH', cWH_angle_1, cWH_angle_2, cWH_angle_3, cWH_angle_4, cWH_dist, len(cWH)) | ||
2938 | + graphe('tWH', tWH_angle_1, tWH_angle_2, tWH_angle_3, tWH_angle_4, tWH_dist, len(tWH)) | ||
2939 | + graphe('cHW', cHW_angle_1, cHW_angle_2, cHW_angle_3, cHW_angle_4, cHW_dist, len(cHW)) | ||
2940 | + graphe('tHW', tHW_angle_1, tHW_angle_2, tHW_angle_3, tHW_angle_4, tHW_dist, len(tHW)) | ||
2941 | + graphe('tWS', tWS_angle_1, tWS_angle_2, tWS_angle_3, tWS_angle_4, tWS_dist, len(tWS)) | ||
2942 | + graphe('cWS', cWS_angle_1, cWS_angle_2, cWS_angle_3, cWS_angle_4, cWS_dist, len(cWS)) | ||
2943 | + graphe('tSW', tSW_angle_1, tSW_angle_2, tSW_angle_3, tSW_angle_4, tSW_dist, len(tSW)) | ||
2944 | + graphe('cSW', cSW_angle_1, cSW_angle_2, cSW_angle_3, cSW_angle_4, cSW_dist, len(cSW)) | ||
2945 | + graphe('cHH', cHH_angle_1, cHH_angle_2, cHH_angle_3, cHH_angle_4, cHH_dist, len(cHH)) | ||
2946 | + graphe('tHH', tHH_angle_1, tHH_angle_2, tHH_angle_3, tHH_angle_4, tHH_dist, len(tHH)) | ||
2947 | + graphe('cSH', cSH_angle_1, cSH_angle_2, cSH_angle_3, cSH_angle_4, cSH_dist, len(cSH)) | ||
2948 | + graphe('tSH', tSH_angle_1, tSH_angle_2, tSH_angle_3, tSH_angle_4, tSH_dist, len(tSH)) | ||
2949 | + graphe('cHS', cHS_angle_1, cHS_angle_2, cHS_angle_3, cHS_angle_4, cHS_dist, len(cHS)) | ||
2950 | + graphe('tHS', tHS_angle_1, tHS_angle_2, tHS_angle_3, tHS_angle_4, tHS_dist, len(tHS)) | ||
2951 | + graphe('cSS', cSS_angle_1, cSS_angle_2, cSS_angle_3, cSS_angle_4, cSS_dist, len(cSS)) | ||
2952 | + graphe('tSS', tSS_angle_1, tSS_angle_2, tSS_angle_3, tSS_angle_4, tSS_dist, len(tSS)) | ||
2953 | + | ||
2954 | + nc=len(cWW)+len(cHH)+len(cSS)+len(cWH)+len(cHW)+len(cWS)+len(cSW)+len(cHS)+len(cSH) | ||
2955 | + GMM_tot(cWW_dist, "cWW", nc, 'lightcoral') | ||
2956 | + GMM_tot(cHH_dist, "cHH", nc, 'lightseagreen') | ||
2957 | + GMM_tot(cSS_dist, "cSS", nc, 'black') | ||
2958 | + GMM_tot(cWH_dist, "cWH", nc, 'goldenrod') | ||
2959 | + GMM_tot(cHW_dist, "cHW", nc, 'olive') | ||
2960 | + GMM_tot(cWS_dist, "cWS", nc, 'steelblue') | ||
2961 | + GMM_tot(cSW_dist, "cSW", nc, 'silver') | ||
2962 | + GMM_tot(cHS_dist, "cHS", nc, 'deeppink') | ||
2963 | + GMM_tot(cSH_dist, "cSH", nc, 'navy') | ||
2964 | + plt.xlabel('Distance (Angström)') | ||
2965 | + plt.title("GMM des distances entre pointes des nucléotides pour les appariements cis ("+str(nc)+ " valeurs)", fontsize=9) | ||
2966 | + plt.savefig("GMM des distances entre pointes des nucléotides pour les appariements cis (" +str(nc)+ " valeurs).png") | ||
2967 | + plt.close() | ||
2664 | 2968 | ||
2969 | + nt=len(tWW)+len(tHH)+len(tSS)+len(tWH)+len(tHW)+len(tWS)+len(tSW)+len(tHS)+len(tSH) | ||
2970 | + GMM_tot(tWW_dist, "tWW", nt, 'sienna') | ||
2971 | + GMM_tot(tHH_dist, "tHH", nt, 'maroon') | ||
2972 | + GMM_tot(tSS_dist, "tSS", nt, 'orange') | ||
2973 | + GMM_tot(tWH_dist, "tWH", nt, 'mediumaquamarine') | ||
2974 | + GMM_tot(tHW_dist, "tHW", nt, 'tomato') | ||
2975 | + GMM_tot(tWS_dist, "tWS", nt, 'indigo') | ||
2976 | + GMM_tot(tSW_dist, "tSW", nt, 'orchid') | ||
2977 | + GMM_tot(tHS_dist, "tHS", nt, 'tan') | ||
2978 | + GMM_tot(tSH_dist, "tSH", nt, 'lime') | ||
2979 | + plt.xlabel('Distance (Angström)') | ||
2980 | + plt.title("GMM des distances entre pointes des nucléotides pour les appariements trans ("+str(nt)+ " valeurs)", fontsize=9) | ||
2981 | + plt.savefig("GMM des distances entre pointes des nucléotides pour les appariements trans (" +str(nt)+ " valeurs).png") | ||
2982 | + plt.close() | ||
2983 | + | ||
2665 | 2984 | ||
2666 | if __name__ == "__main__": | 2985 | if __name__ == "__main__": |
2667 | 2986 | ||
... | @@ -2793,18 +3112,28 @@ if __name__ == "__main__": | ... | @@ -2793,18 +3112,28 @@ if __name__ == "__main__": |
2793 | joblist.append(Job(function=dist_atoms, args=(f,))) | 3112 | joblist.append(Job(function=dist_atoms, args=(f,))) |
2794 | ''' | 3113 | ''' |
2795 | 3114 | ||
2796 | - #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0]) | 3115 | + ld=os.listdir(path_to_3D_data +'datapoints')[:1000] |
2797 | - #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv') | 3116 | + if '4zdo_1_E' in ld : |
3117 | + ld.remove('4zdo_1_E')#cas particuliers | ||
3118 | + if '4zdp_1_E' in ld : | ||
3119 | + ld.remove('4zdp_1_E') | ||
3120 | + dictionnaire=dictio(ld) | ||
2798 | 3121 | ||
3122 | + for cle in dictionnaire.keys(): | ||
3123 | + joblist.append(Job(function=parse, args=(cle,))) | ||
2799 | 3124 | ||
2800 | - exit() | 3125 | + |
3126 | + #exit() | ||
3127 | + ''' | ||
2801 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] | 3128 | f_prec=os.listdir(path_to_3D_data + "rna_only")[0] |
3129 | + | ||
2802 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: | 3130 | for f in os.listdir(path_to_3D_data + "rna_only")[:100]: |
2803 | #joblist.append(Job(function=dist_atoms, args=(f,))) | 3131 | #joblist.append(Job(function=dist_atoms, args=(f,))) |
2804 | #joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) | 3132 | #joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) |
2805 | joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) | 3133 | joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) |
2806 | joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) | 3134 | joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) |
2807 | 3135 | ||
3136 | + ''' | ||
2808 | 3137 | ||
2809 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) | 3138 | p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) |
2810 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) | 3139 | pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) |
... | @@ -2836,7 +3165,8 @@ if __name__ == "__main__": | ... | @@ -2836,7 +3165,8 @@ if __name__ == "__main__": |
2836 | print() | 3165 | print() |
2837 | print() | 3166 | print() |
2838 | 3167 | ||
2839 | - | 3168 | + concatenate('/results/basepairs/', 'basepairs.csv') |
3169 | + graph_basepairs() | ||
2840 | # finish the work after the parallel portions | 3170 | # finish the work after the parallel portions |
2841 | ''' | 3171 | ''' |
2842 | per_chain_stats() | 3172 | per_chain_stats() | ... | ... |
-
Please register or login to post a comment