Louis BECQUEY

Created folders scripts/ and figures/

>__'ZDFS33 : 0-150'
UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCC
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
100 first nucleotides : 8 solutions in 18.908212661743164 seconds, using 622568 kb of RAM
Traceback (most recent call last):
File "benchmark_longueur.py", line 23, in <module>
output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
File "/usr/local/lib/python3.8/subprocess.py", line 411, in check_output
return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
File "/usr/local/lib/python3.8/subprocess.py", line 512, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['./bin/biorseo', '-d', './data/modules/DESC', '-s', './ZDFS33.fa', '-v']' died with <Signals.SIGKILL: 9>.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
......@@ -3,8 +3,11 @@ import subprocess
import time
import resource
# take a RNA sequence and cut it from 100 bases to actual length
# then measure computation time, peak memory, and number of solutions for each length
# This RNA is actually a 16S rRNA from PDB 1J5E.
# http://ndbserver.rutgers.edu/service/ndb/atlas/summary
seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
step = 100
......@@ -13,12 +16,13 @@ n = len(seq)
while step < len(seq)+50:
sub_seq = seq[0:(min(step,n))]
fasta = open("ZDFS33.fa", 'w')
# write the sequence to file
fasta = open("data/fasta/ZDFS33.fa", 'w')
fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
fasta.close()
# run biorseo on it, with default options
cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
old_time = time.time()
output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
run_time = time.time() - old_time
......
......@@ -6,6 +6,7 @@ echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
echo "- Nupack header files: nupack_3.2.2.tar.gz";
exit 0;
cd ../
THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
####################################################### Dependencies ##############################################################
......@@ -14,7 +15,7 @@ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 1
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
# CPLEX: only to build biorseo
# HERE YOU SHOULD GET YOU ROWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
# HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
chmod +x cplex_installer_12.8_Student.bin
printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
rm cplex_installer_12.8_Student.bin
......
......@@ -2,6 +2,8 @@
#!/bin/bash
######################################################## RNA modules ##############################################################
cd ../
# Rna3Dmotifs data
mkdir -p data/modules/DESC
wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
......@@ -26,6 +28,7 @@ sudo -H pip3 install networkx numpy regex wrapt biopython
git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
cd BayesPairing
sudo -H pip3 install .
# Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
cd bayespairing/src
python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
......
......@@ -421,10 +421,7 @@ if extension == "all":
for a in ax:
a.label_outer()
plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace = 0.05)
plt.savefig("pareto_visualizerD.png")
plt.savefig("pareto_visualizerD.png")
else:
fig, ax = plt.subplots(2,1, figsize=(6,5))
plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
......
import os
#!/usr/bin/python3
# This script's purpose is to extract information about the CaRNAval
# RINS from a Python pickle object containing RINs from their RIN.py class.
# We do this because the official JSON file is hard to understand, and Antoine Soulé
# recommended the pickle.
import networkx, os, pickle, sys
if __name__=="__main__":
##nxpickled import
dir = os.getcwd() + "/data/modules/CaRNAval/"
rin_DIR = os.getcwd() + "/../data/modules/CaRNAval/"
filename = "CaRNAval_1_as_dictionnary.nxpickled"
# Check that we can find CaRNAval RINs, and load the dataset
try:
import sys
sys.path.append(os.path.abspath(dir))
sys.path.append(os.path.abspath(rin_DIR))
import RIN
except:
print("File not found : " + dir + "RIN.py")
else:
filename = "CaRNAval_1_as_dictionnary.nxpickled"
try:
import networkx
import pickle
objects = []
with (open(dir+filename, "rb")) as openfile:
while True:
try:
objects.append(pickle.load(openfile))
except EOFError:
break
print("Dataset loaded")
except OSError:
print("File not found : " + dir + filename)
print("File not found:" + rin_DIR + "RIN.py")
exit(1)
else:
##Creation of a file for each RIN
try:
os.mkdir(dir + "Subfiles")
except OSError:
print ("Creation of the directory %s failed" % (dir + "Subfiles") + " : maybe it already exists ?")
try:
objects = []
with (open(rin_DIR+filename, "rb")) as openfile:
while True:
try:
objects.append(pickle.load(openfile))
except EOFError:
break
print("Dataset loaded")
except OSError:
print("File not found : " + rin_DIR + filename)
exit(1)
# Creation of a directory to extract RINs from the pickle file to individual files
try:
os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
except OSError:
print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
exit(1)
# Loop on every CaRNAval module and extract it from the Python object to flat text file
n_modules = len(objects[0]) # ? to
for i in range(1,1+n_modules):
motif = objects[0][i].graph
f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
f.write("ntA,ntB,long_range;...\n")
components = []
comp = []
nodes = list(motif)
nodes.sort()
for node in nodes:
if comp == []:
comp.append(node)
else:
print ("Successfully created the directory %s " % (dir + "Subfiles"))
header_link = "ntA,ntB,long_range;...\n"
header_comp = "pos;k;seq\n"
for i in range(1,338):
motif = objects[0][i].graph
f = open( dir + "Subfiles/" + str(i-1) + ".txt" , "w+" )
f.write(header_link)
components = []
comp = []
nodes = list(motif)
nodes.sort()
for node in nodes:
if comp == []:
comp.append(node)
else:
if comp[-1] + 1 != node : #not the same component
components.append(comp)
comp = []
comp.append(node)
else :
comp.append(node)
components.append(comp)
#print(nodes)
liaisons = ""
edges = list(motif.edges())
for a in edges:
if motif.edges[a]['label'] == 'CWW' :
ntA = nodes.index(a[0])
ntB = nodes.index(a[1])
if ntA <= ntB :
liaisons += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
f.write(liaisons + "\n")
f.write(header_comp)
num_nt = -1
for a in components:
seq = ""
data_comp = str(num_nt+1)
for b in a:
num_nt += 1
#sometimes in the nxpicled file, a node has the attribute "realnt", and sometimes "real_nt", but it's the same thing
try:
seq += motif.nodes[b]["realnt"]
except:
seq += motif.nodes[b]["real_nt"]
data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
f.write(data_comp)
f.close()
print(str(i-1) + ".txt created")
print("Successfully parsed "+dir+filename)
if comp[-1] + 1 != node : #not the same component
components.append(comp)
comp = []
comp.append(node)
else :
comp.append(node)
components.append(comp)
#print(nodes)
basepairs = ""
edges = list(motif.edges())
for a in edges:
if motif.edges[a]['label'] == 'CWW' :
ntA = nodes.index(a[0])
ntB = nodes.index(a[1])
if ntA <= ntB :
basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
f.write(basepairs + "\n")
f.write("pos;k;seq\n")
num_nt = -1
for a in components:
seq = ""
data_comp = str(num_nt+1)
for b in a:
num_nt += 1
# sometimes in the nxpicled file, a node has the attribute "realnt",
# and sometimes "real_nt", but it's the same thing
try:
seq += motif.nodes[b]["realnt"]
except:
seq += motif.nodes[b]["real_nt"]
data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
f.write(data_comp)
f.close()
# print(str(i-1) + ".txt created")
print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
......
>_PKB15:_eggplant_mosaic_virus_(EMV)
UGGGUGCGACUCCCCCCCCUCCCGUGGGUCAACGGGAACCA
..[[.......]]....[[[((((((]]]...))))))... + rna3dmotif24 81.0000000 5.8860996
..((((...)).......))((((((......))))))... 0.0000000 6.6571104
__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
.(((((((.((...)).))..(((.((((([[[....)))))..)))((((...]]].))))..))))).... + 2JYM.A.1 0.7737056 17.7058440
.((((((((((...)))....(((.((((([[.....)))))..)))((((....]].))))))))))).... 0.0000000 19.1791150