Aglaé TABOT

Added functions appariements, parse, graph_basepairs

...@@ -17,6 +17,7 @@ import scipy.cluster.hierarchy as sch ...@@ -17,6 +17,7 @@ import scipy.cluster.hierarchy as sch
17 import sklearn 17 import sklearn
18 import json 18 import json
19 import pickle 19 import pickle
20 +import Bio
20 from scipy.spatial.distance import squareform 21 from scipy.spatial.distance import squareform
21 from mpl_toolkits.mplot3d import axes3d 22 from mpl_toolkits.mplot3d import axes3d
22 from Bio import AlignIO, SeqIO 23 from Bio import AlignIO, SeqIO
...@@ -1639,7 +1640,7 @@ def dist_atoms_hire_RNA (f) : ...@@ -1639,7 +1640,7 @@ def dist_atoms_hire_RNA (f) :
1639 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} dist_atoms_hire_RNA({f})") 1640 setproctitle(f"RNANet statistics.py Worker {thr_idx+1} dist_atoms_hire_RNA({f})")
1640 1641
1641 parser=MMCIFParser() 1642 parser=MMCIFParser()
1642 - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) 1643 + s = parser.get_structure(name, os.path.abspath(path_to_3D_data +"rna_only/" + f))
1643 chain = next(s[0].get_chains()) 1644 chain = next(s[0].get_chains())
1644 residues=list(chain.get_residues()) 1645 residues=list(chain.get_residues())
1645 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} dist_atoms_hire_RNA", unit="residu", leave=False) 1646 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} dist_atoms_hire_RNA", unit="residu", leave=False)
...@@ -1800,7 +1801,7 @@ def angles_torsion_hire_RNA(f): ...@@ -1800,7 +1801,7 @@ def angles_torsion_hire_RNA(f):
1800 os.makedirs(runDir+"/results/HiRE-RNA/torsions/", exist_ok=True) 1801 os.makedirs(runDir+"/results/HiRE-RNA/torsions/", exist_ok=True)
1801 1802
1802 parser=MMCIFParser() 1803 parser=MMCIFParser()
1803 - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) 1804 + s = parser.get_structure(name, os.path.abspath(path_to_3D_data + "rna_only/" + f))
1804 chain = next(s[0].get_chains()) 1805 chain = next(s[0].get_chains())
1805 residues=list(chain.get_residues()) 1806 residues=list(chain.get_residues())
1806 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) 1807 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False)
...@@ -1926,7 +1927,7 @@ def angles_plans_hire_RNA(f): ...@@ -1926,7 +1927,7 @@ def angles_plans_hire_RNA(f):
1926 os.makedirs(runDir+"/results/HiRE-RNA/angles/", exist_ok=True) 1927 os.makedirs(runDir+"/results/HiRE-RNA/angles/", exist_ok=True)
1927 1928
1928 parser=MMCIFParser() 1929 parser=MMCIFParser()
1929 - s = parser.get_structure(name, os.path.abspath("/home/data/RNA/3D/rna_only/" + f)) 1930 + s = parser.get_structure(name, os.path.abspath(path_to_3D_data + "rna_only/" + f))
1930 chain = next(s[0].get_chains()) 1931 chain = next(s[0].get_chains())
1931 residues=list(chain.get_residues()) 1932 residues=list(chain.get_residues())
1932 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False) 1933 pbar = tqdm(total=len(residues), position=thr_idx+1, desc=f"Worker {thr_idx+1}: {f} angles_torsion_hire_RNA", unit="residu", leave=False)
...@@ -2045,6 +2046,7 @@ def GMM_histo(data, name_data, x, y, nb_fichiers) : ...@@ -2045,6 +2046,7 @@ def GMM_histo(data, name_data, x, y, nb_fichiers) :
2045 def GMM_histo_without_saving(data, name_data, x, y, nb_fichiers) : 2046 def GMM_histo_without_saving(data, name_data, x, y, nb_fichiers) :
2046 ''' 2047 '''
2047 Plot Gaussian-Mixture-Model on histograms 2048 Plot Gaussian-Mixture-Model on histograms
2049 + But without saving and close the figure
2048 ''' 2050 '''
2049 histogram(data, name_data, x, y, nb_fichiers)#plot the histogram 2051 histogram(data, name_data, x, y, nb_fichiers)#plot the histogram
2050 2052
...@@ -2564,6 +2566,7 @@ def dictio(ld): ...@@ -2564,6 +2566,7 @@ def dictio(ld):
2564 key = pdb identifier of the structure 2566 key = pdb identifier of the structure
2565 value = list of RNA chains 2567 value = list of RNA chains
2566 ''' 2568 '''
2569 + dictionnaire=dict()
2567 pdb=[] 2570 pdb=[]
2568 for f in ld: 2571 for f in ld:
2569 pdb_id=str.split(f, '_')[0] 2572 pdb_id=str.split(f, '_')[0]
...@@ -2648,20 +2651,336 @@ def graphe(type_LW, angle_1, angle_2, angle_3, angle_4, distance, nb): ...@@ -2648,20 +2651,336 @@ def graphe(type_LW, angle_1, angle_2, angle_3, angle_4, distance, nb):
2648 2651
2649 plt.subplot(2, 1, 1) 2652 plt.subplot(2, 1, 1)
2650 2653
2651 - GMM_tot(angle_1, "C4'-C1'-B1", nb, 'cyan' ) 2654 + if len(angle_1) > 0 :
2652 - GMM_tot(angle_2, "C1'-B1-B1pair", nb, 'magenta') 2655 + GMM_tot(angle_1, "C4'-C1'-B1", nb, 'cyan' )
2653 - GMM_tot(angle_3, "B1-B1pair-C1'pair", nb, "yellow") 2656 + if len(angle_2) > 0 :
2654 - GMM_tot(angle_4, "B1pair-C1'pair-C4'pair", nb, 'olive') 2657 + GMM_tot(angle_2, "C1'-B1-B1pair", nb, 'magenta')
2658 + if len(angle_3) > 0 :
2659 + GMM_tot(angle_3, "B1-B1pair-C1'pair", nb, "yellow")
2660 + if len(angle_4) > 0 :
2661 + GMM_tot(angle_4, "B1pair-C1'pair-C4'pair", nb, 'olive')
2655 plt.xlabel("Angle(degré)") 2662 plt.xlabel("Angle(degré)")
2656 plt.title("GMM des angles plans pour les appariements " +type_LW +" (" +str(nb)+ " valeurs)", fontsize=10) 2663 plt.title("GMM des angles plans pour les appariements " +type_LW +" (" +str(nb)+ " valeurs)", fontsize=10)
2657 2664
2658 plt.subplot(2, 1, 2) 2665 plt.subplot(2, 1, 2)
2659 - GMM_histo_without_saving(distance, "Distance pointes " + type_LW, "Distance (Angström)", "Densité", nb) 2666 + if len(distance)>0 :
2667 + GMM_histo_without_saving(distance, "Distance pointes " + type_LW, "Distance (Angström)", "Densité", nb)
2660 2668
2661 plt.savefig("Mesures appariements " +type_LW+ " (" +str(nb)+ " valeurs).png" ) 2669 plt.savefig("Mesures appariements " +type_LW+ " (" +str(nb)+ " valeurs).png" )
2662 plt.close() 2670 plt.close()
2663 2671
2672 +def appariements(chain, df):
2673 + '''
2674 + Cleanup of the dataset
2675 + measurements of distances and angles between paired nucleotides in the chain
2676 + '''
2677 + liste_dist=[]
2678 + warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
2679 +
2680 + pairs=df[['index_chain', 'old_nt_resnum', 'paired', 'pair_type_LW']] # columns we keep
2681 + for i in range(pairs.shape[0]): #we remove the lines where no pairing (NaN in paired)
2682 + index_with_nan=pairs.index[pairs.iloc[:,2].isnull()]
2683 + pairs.drop(index_with_nan, 0, inplace=True)
2684 +
2685 + paired_int=[]
2686 + for i in pairs.index:# convert values ​​from paired to integers or lists of integers
2687 + paired=pairs.at[i, 'paired']
2688 + if type(paired) is np.int64 or type(paired) is np.float64:
2689 + paired_int.append(int(paired))
2690 + else : #strings
2691 + if len(paired)<3 : #a single pairing
2692 + paired_int.append(int(paired))
2693 + else : #several pairings
2694 + paired=paired.split(',')
2695 + l=[int(i) for i in paired]
2696 + paired_int.append(l)
2697 +
2698 + pair_type_LW_bis=[]
2699 + for j in pairs.index:
2700 + pair_type_LW = pairs.at[j, 'pair_type_LW']
2701 + if len(pair_type_LW)<4 : #a single pairing
2702 + pair_type_LW_bis.append(pair_type_LW)
2703 + else : #several pairings
2704 + pair_type_LW=pair_type_LW.split(',')
2705 + l=[i for i in pair_type_LW]
2706 + pair_type_LW_bis.append(pair_type_LW)
2707 +
2708 + #addition of these new columns
2709 + pairs.insert(4, "paired_int", paired_int, True)
2710 + pairs.insert(5, "pair_type_LW_bis", pair_type_LW_bis, True)
2711 +
2712 + indexNames=pairs[pairs['paired_int'] == 0].index
2713 + pairs.drop(indexNames, inplace=True)#deletion of lines with a 0 in paired_int (matching to another RNA chain)
2714 +
2715 + for i in pairs.index:
2716 + '''
2717 + calculations for each row of the pairs dataset
2718 + '''
2719 + resseq=pairs.at[i, 'old_nt_resnum'] #number of the residue in the chain
2720 + #code to delete letters in old_nt_resnum
2721 + icode_res=' '
2722 + if type(resseq) is str:
2723 + if resseq[0] != '-' :
2724 + while resseq.isdigit() is False:
2725 + l=len(resseq)
2726 + if icode_res==' ':
2727 + icode_res=resseq[l-1]
2728 + else :
2729 + icode_res=resseq[l-1]+icode_res
2730 + resseq=resseq[:l-1]
2731 + resseq=int(resseq)
2732 + index=pairs.at[i, 'index_chain']
2733 + type_LW=pairs.at[i, 'pair_type_LW_bis'] #pairing type
2734 + num_paired=pairs.at[i, 'paired_int'] #number (index_chain) of the paired nucleotide
2735 +
2736 +
2737 + if type(num_paired) is int or type(num_paired) is np.int64:
2738 + l=pairs[pairs['index_chain']==num_paired].index.to_list()
2739 +
2740 + resnum=pairs.at[l[0], 'old_nt_resnum']
2741 + icode_pair=' '
2742 + if type(resnum) is str:
2743 + if resnum[0] != '-' :
2744 + while resnum.isdigit() is False:
2745 + l=len(resnum)
2746 + if icode_pair==' ':
2747 + icode_pair=resnum[l-1]
2748 + else :
2749 + icode_pair=resnum[l-1]+icode_pair
2750 + resnum=resnum[:l-1]
2751 +
2752 + resnum=int(resnum)
2753 + try :
2754 + d=dist_pointes(chain[(' ',resseq, icode_res)], chain[(' ', resnum, icode_pair)]) #calculation of the distance between the tips of the paired nucleotides
2755 + angle=angle_c1_b1(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)])
2756 + if d != 0.0:
2757 + liste_dist.append([chain, type_LW, resseq, resnum, d, angle[0], angle[1], angle[2], angle[3]])
2758 + except :
2759 + pass
2760 + else :
2761 + for j in range(len(num_paired)): #if several pairings, process them one by one
2762 + if num_paired[j] != 0 :
2763 + l=pairs[pairs['index_chain']==num_paired[j]].index.to_list()
2764 +
2765 + resnum=pairs.at[l[0], 'old_nt_resnum']
2766 +
2767 + icode_pair=' '
2768 + if type(resnum) is str:
2769 + if resnum[0] != '-' :
2770 + while resnum.isdigit() is False:
2771 + l=len(resnum)
2772 + if icode_pair==' ':
2773 + icode_pair=resnum[l-1]
2774 + else :
2775 + icode_pair=resnum[l-1]+icode_pair
2776 + resnum=resnum[:l-1]
2777 + resnum=int(resnum)
2778 + try :
2779 + d=dist_pointes(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)])
2780 + angle=angle_c1_b1(chain[(' ', resseq, icode_res)], chain[(' ', resnum, icode_pair)])
2781 + if d != 0.0:
2782 + liste_dist.append([chain, type_LW[j], resseq, resnum, d, angle[0], angle[1], angle[2], angle[3]])
2783 + except:
2784 + pass
2785 +
2786 + return(liste_dist)
2787 +
2788 +def parse(cle):
2789 + l=[]
2790 + os.makedirs(runDir + "/results/basepairs/", exist_ok=True)
2791 + with warnings.catch_warnings():
2792 + # Ignore the PDB problems. This mostly warns that some chain is discontinuous.
2793 + warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.PDBConstructionWarning)
2794 + warnings.simplefilter('ignore', Bio.PDB.PDBExceptions.BiopythonWarning)
2795 + parser=MMCIFParser()
2796 + s = parser.get_structure(cle, os.path.abspath(path_to_3D_data + "RNAcifs/" + cle +".cif"))
2797 + for model in s:
2798 + for valeur in dictionnaire[cle]:
2799 + if len(valeur)>2:
2800 + df_tot=[]
2801 + for id_chain in valeur:
2802 + for data in ld:
2803 + if (len(data)<10): #unmapped
2804 + chaine=str.split(data, '_')
2805 + if chaine[0]==cle and chaine[2]==id_chain:
2806 + df=pd.read_csv(os.path.abspath(path_to_3D_data +"datapoints/" + data))
2807 + if df['index_chain'][0]==1:#ignore files with numbering errors
2808 + l=appariements(model[id_chain], df)
2809 + else :
2810 + for data in ld:
2811 + if (len(data)<10): #unmapped
2812 + chaine=str.split(data, '_')
2813 + if chaine[0]==cle and chaine[2]==valeur:
2814 + df=pd.read_csv(os.path.abspath(path_to_3D_data + "datapoints/" + data))
2815 + if df['index_chain'][0]==1:
2816 + l=appariements(model[valeur], df)
2817 + df_calc=pd.DataFrame(l, columns=["Chaine", "type LW", "Resseq", "Num paired", "Distance", "C4'-C1'-B1", "C1'-B1-B1pair", "B1-B1pair-C1'pair", "B1pair-C1'pair-C4'pair"])
2818 + df_calc.to_csv(runDir + "/results/basepairs/"+'basepairs '+cle+'_'+valeur+'.csv')
2819 +
2820 +def graph_basepairs():
2821 + df=pd.read_csv(os.path.abspath(runDir + "/results/basepairs/basepairs.csv"))
2822 +
2823 + cWW=df[df['type LW']=='cWW']
2824 + cWW_dist=list(cWW["Distance"])
2825 + cWW_angle_1=list(cWW["C4'-C1'-B1"])
2826 + cWW_angle_2=list(cWW["C1'-B1-B1pair"])
2827 + cWW_angle_3=list(cWW["B1-B1pair-C1'pair"])
2828 + cWW_angle_4=list(cWW["B1pair-C1'pair-C4'pair"])
2829 + tWW=df[df['type LW']=='tWW']
2830 + tWW_dist=list(tWW["Distance"])
2831 + tWW_angle_1=list(tWW["C4'-C1'-B1"])
2832 + tWW_angle_2=list(tWW["C1'-B1-B1pair"])
2833 + tWW_angle_3=list(tWW["B1-B1pair-C1'pair"])
2834 + tWW_angle_4=list(tWW["B1pair-C1'pair-C4'pair"])
2835 + cWH=df[df['type LW']=='cWH']
2836 + cWH_dist=list(cWH["Distance"])
2837 + cWH_angle_1=list(cWH["C4'-C1'-B1"])
2838 + cWH_angle_2=list(cWH["C1'-B1-B1pair"])
2839 + cWH_angle_3=list(cWH["B1-B1pair-C1'pair"])
2840 + cWH_angle_4=list(cWH["B1pair-C1'pair-C4'pair"])
2841 + tWH=df[df['type LW']=='tWH']
2842 + tWH_dist=list(tWH["Distance"])
2843 + tWH_angle_1=list(tWH["C4'-C1'-B1"])
2844 + tWH_angle_2=list(tWH["C1'-B1-B1pair"])
2845 + tWH_angle_3=list(tWH["B1-B1pair-C1'pair"])
2846 + tWH_angle_4=list(tWH["B1pair-C1'pair-C4'pair"])
2847 + cHW=df[df['type LW']=='cHW']
2848 + cHW_dist=list(cHW["Distance"])
2849 + cHW_angle_1=list(cHW["C4'-C1'-B1"])
2850 + cHW_angle_2=list(cHW["C1'-B1-B1pair"])
2851 + cHW_angle_3=list(cHW["B1-B1pair-C1'pair"])
2852 + cHW_angle_4=list(cHW["B1pair-C1'pair-C4'pair"])
2853 + tHW=df[df['type LW']=='tHW']
2854 + tHW_dist=list(tHW["Distance"])
2855 + tHW_angle_1=list(tHW["C4'-C1'-B1"])
2856 + tHW_angle_2=list(tHW["C1'-B1-B1pair"])
2857 + tHW_angle_3=list(tHW["B1-B1pair-C1'pair"])
2858 + tHW_angle_4=list(tHW["B1pair-C1'pair-C4'pair"])
2859 + cWS=df[df['type LW']=='cWS']
2860 + cWS_dist=list(cWS["Distance"])
2861 + cWS_angle_1=list(cWS["C4'-C1'-B1"])
2862 + cWS_angle_2=list(cWS["C1'-B1-B1pair"])
2863 + cWS_angle_3=list(cWS["B1-B1pair-C1'pair"])
2864 + cWS_angle_4=list(cWS["B1pair-C1'pair-C4'pair"])
2865 + tWS=df[df['type LW']=='tWS']
2866 + tWS_dist=list(tWS["Distance"])
2867 + tWS_angle_1=list(tWS["C4'-C1'-B1"])
2868 + tWS_angle_2=list(tWS["C1'-B1-B1pair"])
2869 + tWS_angle_3=list(tWS["B1-B1pair-C1'pair"])
2870 + tWS_angle_4=list(tWS["B1pair-C1'pair-C4'pair"])
2871 + cSW=df[df['type LW']=='cSW']
2872 + cSW_dist=list(cSW["Distance"])
2873 + cSW_angle_1=list(cSW["C4'-C1'-B1"])
2874 + cSW_angle_2=list(cSW["C1'-B1-B1pair"])
2875 + cSW_angle_3=list(cSW["B1-B1pair-C1'pair"])
2876 + cSW_angle_4=list(cSW["B1pair-C1'pair-C4'pair"])
2877 + tSW=df[df['type LW']=='tSW']
2878 + tSW_dist=list(tSW["Distance"])
2879 + tSW_angle_1=list(tSW["C4'-C1'-B1"])
2880 + tSW_angle_2=list(tSW["C1'-B1-B1pair"])
2881 + tSW_angle_3=list(tSW["B1-B1pair-C1'pair"])
2882 + tSW_angle_4=list(tSW["B1pair-C1'pair-C4'pair"])
2883 + cHH=df[df['type LW']=='cHH']
2884 + cHH_dist=list(cHH["Distance"])
2885 + cHH_angle_1=list(cHH["C4'-C1'-B1"])
2886 + cHH_angle_2=list(cHH["C1'-B1-B1pair"])
2887 + cHH_angle_3=list(cHH["B1-B1pair-C1'pair"])
2888 + cHH_angle_4=list(cHH["B1pair-C1'pair-C4'pair"])
2889 + tHH=df[df['type LW']=='tHH']
2890 + tHH_dist=list(tHH["Distance"])
2891 + tHH_angle_1=list(tHH["C4'-C1'-B1"])
2892 + tHH_angle_2=list(tHH["C1'-B1-B1pair"])
2893 + tHH_angle_3=list(tHH["B1-B1pair-C1'pair"])
2894 + tHH_angle_4=list(tHH["B1pair-C1'pair-C4'pair"])
2895 + cSH=df[df['type LW']=='cSH']
2896 + cSH_dist=list(cSH["Distance"])
2897 + cSH_angle_1=list(cSH["C4'-C1'-B1"])
2898 + cSH_angle_2=list(cSH["C1'-B1-B1pair"])
2899 + cSH_angle_3=list(cSH["B1-B1pair-C1'pair"])
2900 + cSH_angle_4=list(cSH["B1pair-C1'pair-C4'pair"])
2901 + tSH=df[df['type LW']=='tSH']
2902 + tSH_dist=list(tSH["Distance"])
2903 + tSH_angle_1=list(tSH["C4'-C1'-B1"])
2904 + tSH_angle_2=list(tSH["C1'-B1-B1pair"])
2905 + tSH_angle_3=list(tSH["B1-B1pair-C1'pair"])
2906 + tSH_angle_4=list(tSH["B1pair-C1'pair-C4'pair"])
2907 + cHS=df[df['type LW']=='cHS']
2908 + cHS_dist=list(cHS["Distance"])
2909 + cHS_angle_1=list(cHS["C4'-C1'-B1"])
2910 + cHS_angle_2=list(cHS["C1'-B1-B1pair"])
2911 + cHS_angle_3=list(cHS["B1-B1pair-C1'pair"])
2912 + cHS_angle_4=list(cHS["B1pair-C1'pair-C4'pair"])
2913 + tHS=df[df['type LW']=='tHS']
2914 + tHS_dist=list(tHS["Distance"])
2915 + tHS_angle_1=list(tHS["C4'-C1'-B1"])
2916 + tHS_angle_2=list(tHS["C1'-B1-B1pair"])
2917 + tHS_angle_3=list(tHS["B1-B1pair-C1'pair"])
2918 + tHS_angle_4=list(tHS["B1pair-C1'pair-C4'pair"])
2919 + cSS=df[df['type LW']=='cSS']
2920 + cSS_dist=list(cSS["Distance"])
2921 + cSS_angle_1=list(cSS["C4'-C1'-B1"])
2922 + cSS_angle_2=list(cSS["C1'-B1-B1pair"])
2923 + cSS_angle_3=list(cSS["B1-B1pair-C1'pair"])
2924 + cSS_angle_4=list(cSS["B1pair-C1'pair-C4'pair"])
2925 + tSS=df[df['type LW']=='tSS']
2926 + tSS_dist=list(tSS["Distance"])
2927 + tSS_angle_1=list(tSS["C4'-C1'-B1"])
2928 + tSS_angle_2=list(tSS["C1'-B1-B1pair"])
2929 + tSS_angle_3=list(tSS["B1-B1pair-C1'pair"])
2930 + tSS_angle_4=list(tSS["B1pair-C1'pair-C4'pair"])
2931 +
2932 + os.makedirs(runDir + "/results/figures/basepairs/", exist_ok=True)
2933 + os.chdir(runDir + "/results/figures/basepairs/")
2934 +
2935 + graphe('cWW', cWW_angle_1, cWW_angle_2, cWW_angle_3, cWW_angle_4, cWW_dist, len(cWW))
2936 + graphe('tWW', tWW_angle_1, tWW_angle_2, tWW_angle_3, tWW_angle_4, tWW_dist, len(tWW))
2937 + graphe('cWH', cWH_angle_1, cWH_angle_2, cWH_angle_3, cWH_angle_4, cWH_dist, len(cWH))
2938 + graphe('tWH', tWH_angle_1, tWH_angle_2, tWH_angle_3, tWH_angle_4, tWH_dist, len(tWH))
2939 + graphe('cHW', cHW_angle_1, cHW_angle_2, cHW_angle_3, cHW_angle_4, cHW_dist, len(cHW))
2940 + graphe('tHW', tHW_angle_1, tHW_angle_2, tHW_angle_3, tHW_angle_4, tHW_dist, len(tHW))
2941 + graphe('tWS', tWS_angle_1, tWS_angle_2, tWS_angle_3, tWS_angle_4, tWS_dist, len(tWS))
2942 + graphe('cWS', cWS_angle_1, cWS_angle_2, cWS_angle_3, cWS_angle_4, cWS_dist, len(cWS))
2943 + graphe('tSW', tSW_angle_1, tSW_angle_2, tSW_angle_3, tSW_angle_4, tSW_dist, len(tSW))
2944 + graphe('cSW', cSW_angle_1, cSW_angle_2, cSW_angle_3, cSW_angle_4, cSW_dist, len(cSW))
2945 + graphe('cHH', cHH_angle_1, cHH_angle_2, cHH_angle_3, cHH_angle_4, cHH_dist, len(cHH))
2946 + graphe('tHH', tHH_angle_1, tHH_angle_2, tHH_angle_3, tHH_angle_4, tHH_dist, len(tHH))
2947 + graphe('cSH', cSH_angle_1, cSH_angle_2, cSH_angle_3, cSH_angle_4, cSH_dist, len(cSH))
2948 + graphe('tSH', tSH_angle_1, tSH_angle_2, tSH_angle_3, tSH_angle_4, tSH_dist, len(tSH))
2949 + graphe('cHS', cHS_angle_1, cHS_angle_2, cHS_angle_3, cHS_angle_4, cHS_dist, len(cHS))
2950 + graphe('tHS', tHS_angle_1, tHS_angle_2, tHS_angle_3, tHS_angle_4, tHS_dist, len(tHS))
2951 + graphe('cSS', cSS_angle_1, cSS_angle_2, cSS_angle_3, cSS_angle_4, cSS_dist, len(cSS))
2952 + graphe('tSS', tSS_angle_1, tSS_angle_2, tSS_angle_3, tSS_angle_4, tSS_dist, len(tSS))
2953 +
2954 + nc=len(cWW)+len(cHH)+len(cSS)+len(cWH)+len(cHW)+len(cWS)+len(cSW)+len(cHS)+len(cSH)
2955 + GMM_tot(cWW_dist, "cWW", nc, 'lightcoral')
2956 + GMM_tot(cHH_dist, "cHH", nc, 'lightseagreen')
2957 + GMM_tot(cSS_dist, "cSS", nc, 'black')
2958 + GMM_tot(cWH_dist, "cWH", nc, 'goldenrod')
2959 + GMM_tot(cHW_dist, "cHW", nc, 'olive')
2960 + GMM_tot(cWS_dist, "cWS", nc, 'steelblue')
2961 + GMM_tot(cSW_dist, "cSW", nc, 'silver')
2962 + GMM_tot(cHS_dist, "cHS", nc, 'deeppink')
2963 + GMM_tot(cSH_dist, "cSH", nc, 'navy')
2964 + plt.xlabel('Distance (Angström)')
2965 + plt.title("GMM des distances entre pointes des nucléotides pour les appariements cis ("+str(nc)+ " valeurs)", fontsize=9)
2966 + plt.savefig("GMM des distances entre pointes des nucléotides pour les appariements cis (" +str(nc)+ " valeurs).png")
2967 + plt.close()
2664 2968
2969 + nt=len(tWW)+len(tHH)+len(tSS)+len(tWH)+len(tHW)+len(tWS)+len(tSW)+len(tHS)+len(tSH)
2970 + GMM_tot(tWW_dist, "tWW", nt, 'sienna')
2971 + GMM_tot(tHH_dist, "tHH", nt, 'maroon')
2972 + GMM_tot(tSS_dist, "tSS", nt, 'orange')
2973 + GMM_tot(tWH_dist, "tWH", nt, 'mediumaquamarine')
2974 + GMM_tot(tHW_dist, "tHW", nt, 'tomato')
2975 + GMM_tot(tWS_dist, "tWS", nt, 'indigo')
2976 + GMM_tot(tSW_dist, "tSW", nt, 'orchid')
2977 + GMM_tot(tHS_dist, "tHS", nt, 'tan')
2978 + GMM_tot(tSH_dist, "tSH", nt, 'lime')
2979 + plt.xlabel('Distance (Angström)')
2980 + plt.title("GMM des distances entre pointes des nucléotides pour les appariements trans ("+str(nt)+ " valeurs)", fontsize=9)
2981 + plt.savefig("GMM des distances entre pointes des nucléotides pour les appariements trans (" +str(nt)+ " valeurs).png")
2982 + plt.close()
2983 +
2665 2984
2666 if __name__ == "__main__": 2985 if __name__ == "__main__":
2667 2986
...@@ -2793,18 +3112,28 @@ if __name__ == "__main__": ...@@ -2793,18 +3112,28 @@ if __name__ == "__main__":
2793 joblist.append(Job(function=dist_atoms, args=(f,))) 3112 joblist.append(Job(function=dist_atoms, args=(f,)))
2794 ''' 3113 '''
2795 3114
2796 - #dist_atoms_hire_RNA(os.listdir(path_to_3D_data + "rna_only")[0]) 3115 + ld=os.listdir(path_to_3D_data +'datapoints')[:1000]
2797 - #concatenate('/results/distances/', os.listdir(runDir+'/results/distances/'), 'dist_atoms.csv') 3116 + if '4zdo_1_E' in ld :
3117 + ld.remove('4zdo_1_E')#cas particuliers
3118 + if '4zdp_1_E' in ld :
3119 + ld.remove('4zdp_1_E')
3120 + dictionnaire=dictio(ld)
2798 3121
3122 + for cle in dictionnaire.keys():
3123 + joblist.append(Job(function=parse, args=(cle,)))
2799 3124
2800 - exit() 3125 +
3126 + #exit()
3127 + '''
2801 f_prec=os.listdir(path_to_3D_data + "rna_only")[0] 3128 f_prec=os.listdir(path_to_3D_data + "rna_only")[0]
3129 +
2802 for f in os.listdir(path_to_3D_data + "rna_only")[:100]: 3130 for f in os.listdir(path_to_3D_data + "rna_only")[:100]:
2803 #joblist.append(Job(function=dist_atoms, args=(f,))) 3131 #joblist.append(Job(function=dist_atoms, args=(f,)))
2804 #joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,))) 3132 #joblist.append(Job(function=dist_atoms_hire_RNA, args=(f,)))
2805 joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,))) 3133 joblist.append(Job(function=angles_torsion_hire_RNA, args=(f,)))
2806 joblist.append(Job(function=angles_plans_hire_RNA, args=(f,))) 3134 joblist.append(Job(function=angles_plans_hire_RNA, args=(f,)))
2807 3135
3136 + '''
2808 3137
2809 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers) 3138 p = Pool(initializer=init_worker, initargs=(tqdm.get_lock(),), processes=nworkers)
2810 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True) 3139 pbar = tqdm(total=len(joblist), desc="Stat jobs", position=0, unit="job", leave=True)
...@@ -2836,7 +3165,8 @@ if __name__ == "__main__": ...@@ -2836,7 +3165,8 @@ if __name__ == "__main__":
2836 print() 3165 print()
2837 print() 3166 print()
2838 3167
2839 - 3168 + concatenate('/results/basepairs/', 'basepairs.csv')
3169 + graph_basepairs()
2840 # finish the work after the parallel portions 3170 # finish the work after the parallel portions
2841 ''' 3171 '''
2842 per_chain_stats() 3172 per_chain_stats()
......