Created folders scripts/ and figures/

Louis BECQUEY
Commit 5e64a47ece6566cae55c88111074fc88ace5ce0a 5e64a47e 1 parent 8b5d3488
Showing 24 changed files with 103 additions and 129 deletions
ZDFS33.fa
debug_opti.txt
debug_opti2.txt
best_MCCs.png → figures/best_MCCs.png
compare_subopt_MOIP.png → figures/compare_subopt_MOIP.png
detailed_stats.png → figures/detailed_stats.png
pareto_visualizer_ext_A_pk.png → figures/pareto_visualizer_ext_A_pk.png
pareto_visualizer_ext_A_ssd.png → figures/pareto_visualizer_ext_A_ssd.png
pareto_visualizer_ext_B_pk.png → figures/pareto_visualizer_ext_B_pk.png
pareto_visualizer_ext_B_ssd.png → figures/pareto_visualizer_ext_B_ssd.png
log_of_the_run_full.sh
nohup.out
nohup_bl.out
nohup_full.out
nohup_full2.out
output.txt
benchmark.py → scripts/benchmark.py
benchmark_longueur.py → scripts/benchmark_on_seq_length.py
build_BiORSEO_docker_image_ubuntu18.sh → scripts/build_BiORSEO_docker_image_ubuntu18.sh
deploy_BiORSEO_docker_image_linux.sh → scripts/deploy_BiORSEO_docker_image_linux.sh
--- a/ZDFS33.fa deleted 100644 → 0
View file @8b5d348
+++ b/ZDFS33.fa deleted 100644 → 0
View file @8b5d348
- >__'ZDFS33 : 0-150'
- UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCC
--- a/debug_opti.txt deleted 100644 → 0
View file @8b5d348
+++ b/debug_opti.txt deleted 100644 → 0
View file @8b5d348
--- a/debug_opti2.txt deleted 100644 → 0
View file @8b5d348
+++ b/debug_opti2.txt deleted 100644 → 0
View file @8b5d348
--- a/best_MCCs.png → figures/best_MCCs.png
View file @5e64a47
+++ b/best_MCCs.png → figures/best_MCCs.png
View file @5e64a47
--- a/compare_subopt_MOIP.png → figures/compare_subopt_MOIP.png
View file @5e64a47
+++ b/compare_subopt_MOIP.png → figures/compare_subopt_MOIP.png
View file @5e64a47
--- a/detailed_stats.png → figures/detailed_stats.png
View file @5e64a47
+++ b/detailed_stats.png → figures/detailed_stats.png
View file @5e64a47
--- a/pareto_visualizer_ext_A_pk.png → figures/pareto_visualizer_ext_A_pk.png
View file @5e64a47
+++ b/pareto_visualizer_ext_A_pk.png → figures/pareto_visualizer_ext_A_pk.png
View file @5e64a47
--- a/pareto_visualizer_ext_A_ssd.png → figures/pareto_visualizer_ext_A_ssd.png
View file @5e64a47
+++ b/pareto_visualizer_ext_A_ssd.png → figures/pareto_visualizer_ext_A_ssd.png
View file @5e64a47
--- a/pareto_visualizer_ext_B_pk.png → figures/pareto_visualizer_ext_B_pk.png
View file @5e64a47
+++ b/pareto_visualizer_ext_B_pk.png → figures/pareto_visualizer_ext_B_pk.png
View file @5e64a47
--- a/pareto_visualizer_ext_B_ssd.png → figures/pareto_visualizer_ext_B_ssd.png
View file @5e64a47
+++ b/pareto_visualizer_ext_B_ssd.png → figures/pareto_visualizer_ext_B_ssd.png
View file @5e64a47
--- a/log_of_the_run_full.sh deleted 100644 → 0
View file @8b5d348
+++ b/log_of_the_run_full.sh deleted 100644 → 0
View file @8b5d348
--- a/nohup.out deleted 100644 → 0
View file @8b5d348
+++ b/nohup.out deleted 100644 → 0
View file @8b5d348
--- a/nohup_bl.out deleted 100644 → 0
View file @8b5d348
+++ b/nohup_bl.out deleted 100644 → 0
View file @8b5d348
- 100 first nucleotides : 8 solutions in 18.908212661743164 seconds, using 622568 kb of RAM
- Traceback (most recent call last):
-   File "benchmark_longueur.py", line 23, in <module>
-     output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
-   File "/usr/local/lib/python3.8/subprocess.py", line 411, in check_output
-     return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
-   File "/usr/local/lib/python3.8/subprocess.py", line 512, in run
-     raise CalledProcessError(retcode, process.args,
- subprocess.CalledProcessError: Command '['./bin/biorseo', '-d', './data/modules/DESC', '-s', './ZDFS33.fa', '-v']' died with <Signals.SIGKILL: 9>.
--- a/nohup_full.out deleted 100644 → 0
View file @8b5d348
+++ b/nohup_full.out deleted 100644 → 0
View file @8b5d348
--- a/nohup_full2.out deleted 100644 → 0
View file @8b5d348
+++ b/nohup_full2.out deleted 100644 → 0
View file @8b5d348
--- a/output.txt deleted 100644 → 0
View file @8b5d348
+++ b/output.txt deleted 100644 → 0
View file @8b5d348
--- a/benchmark.py → scripts/benchmark.py
View file @5e64a47
+++ b/benchmark.py → scripts/benchmark.py
View file @5e64a47
--- a/benchmark_longueur.py → scripts/benchmark_on_seq_length.py
View file @5e64a47
+++ b/benchmark_longueur.py → scripts/benchmark_on_seq_length.py
View file @5e64a47
@@ -3,8 +3,11 @@ import subprocess
 import time
 import resource
 
+ # take a RNA sequence and cut it from 100 bases to actual length
+ # then measure computation time, peak memory, and number of solutions for each length
 
- 
+ # This RNA is actually a 16S rRNA from PDB 1J5E.
+ # http://ndbserver.rutgers.edu/service/ndb/atlas/summary
 seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
 
 step = 100
@@ -13,12 +16,13 @@ n = len(seq)
 while step < len(seq)+50:
 	sub_seq = seq[0:(min(step,n))]
 
- 	fasta = open("ZDFS33.fa", 'w')
+ 	# write the sequence to file
+ 	fasta = open("data/fasta/ZDFS33.fa", 'w')
 	fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
 	fasta.close()
 
+ 	# run biorseo on it, with default options
 	cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
- 
 	old_time = time.time()
 	output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
 	run_time = time.time() - old_time
--- a/build_BiORSEO_docker_image_ubuntu18.sh → scripts/build_BiORSEO_docker_image_ubuntu18.sh
View file @5e64a47
+++ b/build_BiORSEO_docker_image_ubuntu18.sh → scripts/build_BiORSEO_docker_image_ubuntu18.sh
View file @5e64a47
@@ -6,6 +6,7 @@ echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
 echo "- Nupack header files: nupack_3.2.2.tar.gz";
 exit 0;
 
+ cd ../
 THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
 ####################################################### Dependencies ##############################################################
@@ -14,7 +15,7 @@ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 1
 sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
 
 # CPLEX: only to build biorseo
- # HERE YOU SHOULD GET YOU ROWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
+ # HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
 chmod +x cplex_installer_12.8_Student.bin
 printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
 rm cplex_installer_12.8_Student.bin
--- a/deploy_BiORSEO_docker_image_linux.sh → scripts/deploy_BiORSEO_docker_image_linux.sh
View file @5e64a47
+++ b/deploy_BiORSEO_docker_image_linux.sh → scripts/deploy_BiORSEO_docker_image_linux.sh
View file @5e64a47
@@ -2,6 +2,8 @@
 #!/bin/bash
 ######################################################## RNA modules ##############################################################
 
+ cd ../
+ 
 # Rna3Dmotifs data
 mkdir -p data/modules/DESC
 wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
@@ -26,6 +28,7 @@ sudo -H pip3 install networkx numpy regex wrapt biopython
 git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
 cd BayesPairing
 sudo -H pip3 install .
+ 
 # Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
 cd bayespairing/src
 python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
--- a/pareto_visualizer.py → scripts/pareto_visualizer.py
View file @5e64a47
+++ b/pareto_visualizer.py → scripts/pareto_visualizer.py
View file @5e64a47
@@ -421,10 +421,7 @@ if extension == "all":
     for a in ax:
         a.label_outer()
     plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace = 0.05)
-     plt.savefig("pareto_visualizerD.png")
-     
- 
-     
+     plt.savefig("pareto_visualizerD.png")  
 else:
     fig, ax = plt.subplots(2,1, figsize=(6,5))
     plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
--- a/transform_pickle.py → scripts/transform_CaRNAval_pickle.py
View file @5e64a47
+++ b/transform_pickle.py → scripts/transform_CaRNAval_pickle.py
View file @5e64a47
- import os
+ #!/usr/bin/python3
 
+ # This script's purpose is to extract information about the CaRNAval
+ # RINS from a Python pickle object containing RINs from their RIN.py class.
+ # We do this because the official JSON file is hard to understand, and Antoine Soulé
+ # recommended the pickle.
 
+ import networkx, os, pickle, sys
 
 if __name__=="__main__":
 
-     ##nxpickled import
-     dir = os.getcwd() + "/data/modules/CaRNAval/"
+     
+     rin_DIR = os.getcwd() + "/../data/modules/CaRNAval/"
+     filename = "CaRNAval_1_as_dictionnary.nxpickled"
 
+     # Check that we can find CaRNAval RINs, and load the dataset
     try:
-         import sys
-         sys.path.append(os.path.abspath(dir))
+         sys.path.append(os.path.abspath(rin_DIR))
         import RIN
- 
     except:
-         print("File not found : " + dir + "RIN.py")
- 
-     else:
-         filename = "CaRNAval_1_as_dictionnary.nxpickled"
- 
-         try:
-             import networkx
-             import pickle
- 
-             objects = []
- 
-             with (open(dir+filename, "rb")) as openfile:
-                 while True:
-                     try:
-                         objects.append(pickle.load(openfile))
-                     except EOFError:
-                         break
- 
-             print("Dataset loaded")
- 
-         except OSError:
-             print("File not found : " + dir + filename)
+         print("File not found:" + rin_DIR + "RIN.py")
+         exit(1)
 
-         else:
- 
- 
-             ##Creation of a file for each RIN
-             try:
-                 os.mkdir(dir + "Subfiles")
-             except OSError:
-                 print ("Creation of the directory %s failed" % (dir + "Subfiles") + " : maybe it already exists ?")
+     try:
+         objects = []
+         with (open(rin_DIR+filename, "rb")) as openfile:
+             while True:
+                 try:
+                     objects.append(pickle.load(openfile))
+                 except EOFError:
+                     break
+         print("Dataset loaded")
+     except OSError:
+         print("File not found : " + rin_DIR + filename)
+         exit(1)
+ 
+     # Creation of a directory to extract RINs from the pickle file to individual files
+     try:
+         os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
+     except OSError:
+         print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
+         exit(1)
+ 
+     # Loop on every CaRNAval module and extract it from the Python object to flat text file
+     n_modules = len(objects[0]) # ? to
+     for i in range(1,1+n_modules):
+         motif = objects[0][i].graph
+         f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
+         f.write("ntA,ntB,long_range;...\n")
+ 
+         components = []
+         comp = []
+         nodes = list(motif)
+         nodes.sort()
+         for node in nodes:
+             if comp == []:
+                 comp.append(node)
             else:
-                 print ("Successfully created the directory %s " % (dir + "Subfiles"))
- 
-             header_link = "ntA,ntB,long_range;...\n"
-             header_comp = "pos;k;seq\n"
- 
-             for i in range(1,338):
-                 motif = objects[0][i].graph
-                 f = open( dir + "Subfiles/" + str(i-1) + ".txt"  ,  "w+" )
-                 f.write(header_link)
- 
-                 components = []
-                 comp = []
-                 nodes = list(motif)
-                 nodes.sort()
-                 for node in nodes:
-                     if comp == []:
-                         comp.append(node)
-                     else:
-                         if comp[-1] + 1 != node : #not the same component
-                             components.append(comp)
-                             comp = []
-                             comp.append(node)
-                         else :
-                             comp.append(node)
- 
-                 components.append(comp)
- 
-                 #print(nodes)
- 
-                 liaisons = ""
-                 edges = list(motif.edges())
-                 for a in edges:
-                     if motif.edges[a]['label'] == 'CWW' :
-                         ntA = nodes.index(a[0])
-                         ntB = nodes.index(a[1])
- 
-                         if ntA <= ntB :
-                             liaisons += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
- 
-                 f.write(liaisons + "\n")
-                 f.write(header_comp)
- 
-                 num_nt = -1
-                 for a in components:
-                     seq = ""
-                     data_comp = str(num_nt+1)
-                     for b in a:
-                         num_nt += 1
- 
-                         #sometimes in the nxpicled file, a node has the attribute "realnt", and sometimes "real_nt", but it's the same thing
-                         try:
-                             seq += motif.nodes[b]["realnt"]
- 
-                         except:
-                             seq += motif.nodes[b]["real_nt"]
- 
-                     data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
- 
-                     f.write(data_comp)
- 
- 
-                 f.close()
- 
-                 print(str(i-1) + ".txt created")
- 
-             print("Successfully parsed "+dir+filename)
+                 if comp[-1] + 1 != node : #not the same component
+                     components.append(comp)
+                     comp = []
+                     comp.append(node)
+                 else :
+                     comp.append(node)
+         components.append(comp)
+ 
+         #print(nodes)
+ 
+         basepairs = ""
+         edges = list(motif.edges())
+         for a in edges:
+             if motif.edges[a]['label'] == 'CWW' :
+                 ntA = nodes.index(a[0])
+                 ntB = nodes.index(a[1])
+ 
+                 if ntA <= ntB :
+                     basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
+ 
+         f.write(basepairs + "\n")
+         f.write("pos;k;seq\n")
+ 
+         num_nt = -1
+         for a in components:
+             seq = ""
+             data_comp = str(num_nt+1)
+             for b in a:
+                 num_nt += 1
+ 
+                 # sometimes in the nxpicled file, a node has the attribute "realnt", 
+                 # and sometimes "real_nt", but it's the same thing
+                 try:
+                     seq += motif.nodes[b]["realnt"]
+                 except:
+                     seq += motif.nodes[b]["real_nt"]
+             data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
+             f.write(data_comp)
+ 
+         f.close()
+         # print(str(i-1) + ".txt created")
+ 
+     print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
 
--- a/test_opti_EMV_desc_byp_A deleted 100644 → 0
View file @8b5d348
+++ b/test_opti_EMV_desc_byp_A deleted 100644 → 0
View file @8b5d348
- >_PKB15:_eggplant_mosaic_virus_(EMV)
- UGGGUGCGACUCCCCCCCCUCCCGUGGGUCAACGGGAACCA
- ..[[.......]]....[[[((((((]]]...))))))... + rna3dmotif24	81.0000000	5.8860996
- ..((((...)).......))((((((......))))))...	0.0000000	6.6571104
--- a/test_output deleted 100644 → 0
View file @8b5d348
+++ b/test_output deleted 100644 → 0
View file @8b5d348
- __'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
- GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
- .(((((((.((...)).))..(((.((((([[[....)))))..)))((((...]]].))))..))))).... + 2JYM.A.1	0.7737056	17.7058440
- .((((((((((...)))....(((.((((([[.....)))))..)))((((....]].)))))))))))....	0.0000000	19.1791150