Louis BECQUEY

Created folders scripts/ and figures/

1 ->__'ZDFS33 : 0-150'
2 -UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCC
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 -100 first nucleotides : 8 solutions in 18.908212661743164 seconds, using 622568 kb of RAM
2 -Traceback (most recent call last):
3 - File "benchmark_longueur.py", line 23, in <module>
4 - output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
5 - File "/usr/local/lib/python3.8/subprocess.py", line 411, in check_output
6 - return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
7 - File "/usr/local/lib/python3.8/subprocess.py", line 512, in run
8 - raise CalledProcessError(retcode, process.args,
9 -subprocess.CalledProcessError: Command '['./bin/biorseo', '-d', './data/modules/DESC', '-s', './ZDFS33.fa', '-v']' died with <Signals.SIGKILL: 9>.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
...@@ -3,8 +3,11 @@ import subprocess ...@@ -3,8 +3,11 @@ import subprocess
3 import time 3 import time
4 import resource 4 import resource
5 5
6 +# take a RNA sequence and cut it from 100 bases to actual length
7 +# then measure computation time, peak memory, and number of solutions for each length
6 8
7 - 9 +# This RNA is actually a 16S rRNA from PDB 1J5E.
10 +# http://ndbserver.rutgers.edu/service/ndb/atlas/summary
8 seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU" 11 seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
9 12
10 step = 100 13 step = 100
...@@ -13,12 +16,13 @@ n = len(seq) ...@@ -13,12 +16,13 @@ n = len(seq)
13 while step < len(seq)+50: 16 while step < len(seq)+50:
14 sub_seq = seq[0:(min(step,n))] 17 sub_seq = seq[0:(min(step,n))]
15 18
16 - fasta = open("ZDFS33.fa", 'w') 19 + # write the sequence to file
20 + fasta = open("data/fasta/ZDFS33.fa", 'w')
17 fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq) 21 fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
18 fasta.close() 22 fasta.close()
19 23
24 + # run biorseo on it, with default options
20 cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"] 25 cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
21 -
22 old_time = time.time() 26 old_time = time.time()
23 output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:] 27 output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
24 run_time = time.time() - old_time 28 run_time = time.time() - old_time
......
...@@ -6,6 +6,7 @@ echo "- CPLEX academic version: cplex_installer_12.8_Student.bin"; ...@@ -6,6 +6,7 @@ echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
6 echo "- Nupack header files: nupack_3.2.2.tar.gz"; 6 echo "- Nupack header files: nupack_3.2.2.tar.gz";
7 exit 0; 7 exit 0;
8 8
9 +cd ../
9 THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 10 THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
10 11
11 ####################################################### Dependencies ############################################################## 12 ####################################################### Dependencies ##############################################################
...@@ -14,7 +15,7 @@ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 1 ...@@ -14,7 +15,7 @@ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 1
14 sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100 15 sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
15 16
16 # CPLEX: only to build biorseo 17 # CPLEX: only to build biorseo
17 -# HERE YOU SHOULD GET YOU ROWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore. 18 +# HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
18 chmod +x cplex_installer_12.8_Student.bin 19 chmod +x cplex_installer_12.8_Student.bin
19 printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin 20 printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
20 rm cplex_installer_12.8_Student.bin 21 rm cplex_installer_12.8_Student.bin
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
2 #!/bin/bash 2 #!/bin/bash
3 ######################################################## RNA modules ############################################################## 3 ######################################################## RNA modules ##############################################################
4 4
5 +cd ../
6 +
5 # Rna3Dmotifs data 7 # Rna3Dmotifs data
6 mkdir -p data/modules/DESC 8 mkdir -p data/modules/DESC
7 wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz 9 wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
...@@ -26,6 +28,7 @@ sudo -H pip3 install networkx numpy regex wrapt biopython ...@@ -26,6 +28,7 @@ sudo -H pip3 install networkx numpy regex wrapt biopython
26 git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing 28 git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
27 cd BayesPairing 29 cd BayesPairing
28 sudo -H pip3 install . 30 sudo -H pip3 install .
31 +
29 # Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time) 32 # Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
30 cd bayespairing/src 33 cd bayespairing/src
31 python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............." 34 python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
......
...@@ -421,10 +421,7 @@ if extension == "all": ...@@ -421,10 +421,7 @@ if extension == "all":
421 for a in ax: 421 for a in ax:
422 a.label_outer() 422 a.label_outer()
423 plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace = 0.05) 423 plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace = 0.05)
424 - plt.savefig("pareto_visualizerD.png") 424 + plt.savefig("pareto_visualizerD.png")
425 -
426 -
427 -
428 else: 425 else:
429 fig, ax = plt.subplots(2,1, figsize=(6,5)) 426 fig, ax = plt.subplots(2,1, figsize=(6,5))
430 plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4) 427 plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
......
1 -import os 1 +#!/usr/bin/python3
2 2
3 +# This script's purpose is to extract information about the CaRNAval
4 +# RINS from a Python pickle object containing RINs from their RIN.py class.
5 +# We do this because the official JSON file is hard to understand, and Antoine Soulé
6 +# recommended the pickle.
3 7
8 +import networkx, os, pickle, sys
4 9
5 if __name__=="__main__": 10 if __name__=="__main__":
6 11
7 - ##nxpickled import 12 +
8 - dir = os.getcwd() + "/data/modules/CaRNAval/" 13 + rin_DIR = os.getcwd() + "/../data/modules/CaRNAval/"
14 + filename = "CaRNAval_1_as_dictionnary.nxpickled"
9 15
16 + # Check that we can find CaRNAval RINs, and load the dataset
10 try: 17 try:
11 - import sys 18 + sys.path.append(os.path.abspath(rin_DIR))
12 - sys.path.append(os.path.abspath(dir))
13 import RIN 19 import RIN
14 -
15 except: 20 except:
16 - print("File not found : " + dir + "RIN.py") 21 + print("File not found:" + rin_DIR + "RIN.py")
17 - 22 + exit(1)
18 - else:
19 - filename = "CaRNAval_1_as_dictionnary.nxpickled"
20 -
21 - try:
22 - import networkx
23 - import pickle
24 -
25 - objects = []
26 -
27 - with (open(dir+filename, "rb")) as openfile:
28 - while True:
29 - try:
30 - objects.append(pickle.load(openfile))
31 - except EOFError:
32 - break
33 -
34 - print("Dataset loaded")
35 -
36 - except OSError:
37 - print("File not found : " + dir + filename)
38 23
39 - else: 24 + try:
40 - 25 + objects = []
41 - 26 + with (open(rin_DIR+filename, "rb")) as openfile:
42 - ##Creation of a file for each RIN 27 + while True:
43 - try: 28 + try:
44 - os.mkdir(dir + "Subfiles") 29 + objects.append(pickle.load(openfile))
45 - except OSError: 30 + except EOFError:
46 - print ("Creation of the directory %s failed" % (dir + "Subfiles") + " : maybe it already exists ?") 31 + break
32 + print("Dataset loaded")
33 + except OSError:
34 + print("File not found : " + rin_DIR + filename)
35 + exit(1)
36 +
37 + # Creation of a directory to extract RINs from the pickle file to individual files
38 + try:
39 + os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
40 + except OSError:
41 + print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
42 + exit(1)
43 +
44 + # Loop on every CaRNAval module and extract it from the Python object to flat text file
45 + n_modules = len(objects[0]) # ? to
46 + for i in range(1,1+n_modules):
47 + motif = objects[0][i].graph
48 + f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
49 + f.write("ntA,ntB,long_range;...\n")
50 +
51 + components = []
52 + comp = []
53 + nodes = list(motif)
54 + nodes.sort()
55 + for node in nodes:
56 + if comp == []:
57 + comp.append(node)
47 else: 58 else:
48 - print ("Successfully created the directory %s " % (dir + "Subfiles")) 59 + if comp[-1] + 1 != node : #not the same component
49 - 60 + components.append(comp)
50 - header_link = "ntA,ntB,long_range;...\n" 61 + comp = []
51 - header_comp = "pos;k;seq\n" 62 + comp.append(node)
52 - 63 + else :
53 - for i in range(1,338): 64 + comp.append(node)
54 - motif = objects[0][i].graph 65 + components.append(comp)
55 - f = open( dir + "Subfiles/" + str(i-1) + ".txt" , "w+" ) 66 +
56 - f.write(header_link) 67 + #print(nodes)
57 - 68 +
58 - components = [] 69 + basepairs = ""
59 - comp = [] 70 + edges = list(motif.edges())
60 - nodes = list(motif) 71 + for a in edges:
61 - nodes.sort() 72 + if motif.edges[a]['label'] == 'CWW' :
62 - for node in nodes: 73 + ntA = nodes.index(a[0])
63 - if comp == []: 74 + ntB = nodes.index(a[1])
64 - comp.append(node) 75 +
65 - else: 76 + if ntA <= ntB :
66 - if comp[-1] + 1 != node : #not the same component 77 + basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
67 - components.append(comp) 78 +
68 - comp = [] 79 + f.write(basepairs + "\n")
69 - comp.append(node) 80 + f.write("pos;k;seq\n")
70 - else : 81 +
71 - comp.append(node) 82 + num_nt = -1
72 - 83 + for a in components:
73 - components.append(comp) 84 + seq = ""
74 - 85 + data_comp = str(num_nt+1)
75 - #print(nodes) 86 + for b in a:
76 - 87 + num_nt += 1
77 - liaisons = "" 88 +
78 - edges = list(motif.edges()) 89 + # sometimes in the nxpicled file, a node has the attribute "realnt",
79 - for a in edges: 90 + # and sometimes "real_nt", but it's the same thing
80 - if motif.edges[a]['label'] == 'CWW' : 91 + try:
81 - ntA = nodes.index(a[0]) 92 + seq += motif.nodes[b]["realnt"]
82 - ntB = nodes.index(a[1]) 93 + except:
83 - 94 + seq += motif.nodes[b]["real_nt"]
84 - if ntA <= ntB : 95 + data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
85 - liaisons += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";" 96 + f.write(data_comp)
86 - 97 +
87 - f.write(liaisons + "\n") 98 + f.close()
88 - f.write(header_comp) 99 + # print(str(i-1) + ".txt created")
89 - 100 +
90 - num_nt = -1 101 + print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
91 - for a in components:
92 - seq = ""
93 - data_comp = str(num_nt+1)
94 - for b in a:
95 - num_nt += 1
96 -
97 - #sometimes in the nxpicled file, a node has the attribute "realnt", and sometimes "real_nt", but it's the same thing
98 - try:
99 - seq += motif.nodes[b]["realnt"]
100 -
101 - except:
102 - seq += motif.nodes[b]["real_nt"]
103 -
104 - data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
105 -
106 - f.write(data_comp)
107 -
108 -
109 - f.close()
110 -
111 - print(str(i-1) + ".txt created")
112 -
113 - print("Successfully parsed "+dir+filename)
114 102
......
1 ->_PKB15:_eggplant_mosaic_virus_(EMV)
2 -UGGGUGCGACUCCCCCCCCUCCCGUGGGUCAACGGGAACCA
3 -..[[.......]]....[[[((((((]]]...))))))... + rna3dmotif24 81.0000000 5.8860996
4 -..((((...)).......))((((((......))))))... 0.0000000 6.6571104
1 -__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
2 -GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
3 -.(((((((.((...)).))..(((.((((([[[....)))))..)))((((...]]].))))..))))).... + 2JYM.A.1 0.7737056 17.7058440
4 -.((((((((((...)))....(((.((((([[.....)))))..)))((((....]].))))))))))).... 0.0000000 19.1791150