removed stuff from biorseo

Louis BECQUEY
Commit b84ee67af39efe5f83515f182a66633b9b0bf4d4 b84ee67a 1 parent 97d4bf4f
Showing 16 changed files with 0 additions and 276 deletions
.dockerignore
Dockerfile
benchmark.py
figures/best_MCCs.png
figures/compare_subopt_MOIP.png
figures/detailed_stats.png
figures/pareto_visualizer_ext_A_pk.png
figures/pareto_visualizer_ext_A_ssd.png
figures/pareto_visualizer_ext_B_pk.png
figures/pareto_visualizer_ext_B_ssd.png
rna1999.dG
scripts/Install_CaRNAval_RINs.py
scripts/benchmark_on_seq_length.py
scripts/build_BiORSEO_docker_image_ubuntu18.sh
scripts/deploy_BiORSEO_docker_image_linux.sh
scripts/pareto_visualizer.py
--- a/.dockerignore deleted 100644 → 0
View file @97d4bf4
+++ b/.dockerignore deleted 100644 → 0
View file @97d4bf4
- results_*
- build_BiORSEO_docker_image_ubuntu18.sh
- deploy_BiORSEO_docker_image_linux.sh
- INSTALL.md
- Readme.md
- benchmark_results/
- doc/
--- a/Dockerfile deleted 100644 → 0
View file @97d4bf4
+++ b/Dockerfile deleted 100644 → 0
View file @97d4bf4
- FROM ubuntu:bionic
- 
- # installing dependencies
- RUN apt-get update -yq && \
-     apt-get upgrade -y && \
-     apt-get install -y python3-dev python3-pip openjdk-11-jre libgsl23 libgslcblas0 libboost-program-options-dev libboost-filesystem-dev && \
-     rm -rf /var/lib/apt/lists/*
- 
- # compiled biorseo
- COPY . /biorseo 
- # ViennaRNA installer
- ADD "https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_18_04/viennarna_2.4.14-1_amd64.deb" /
- # jar3d archive
- ADD http://rna.bgsu.edu/data/jar3d/models/jar3d_2014-12-11.jar /
- 
- # install codes
- RUN dpkg -i /viennarna_2.4.14-1_amd64.deb && \
-     apt-get install -f          && \
-     \
-     pip3 install networkx numpy regex wrapt biopython /biorseo/BayesPairing && \
-     \
-     cd / && \
-     rm -rf /biorseo/BayesPairing /ViennaRNA-2.4.13 /ViennaRNA-2.4.13.tar.gz
- WORKDIR /biorseo
\ No newline at end of file
--- a/benchmark.py deleted 100755 → 0
View file @97d4bf4
+++ b/benchmark.py deleted 100755 → 0
View file @97d4bf4
--- a/figures/best_MCCs.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/best_MCCs.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/compare_subopt_MOIP.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/compare_subopt_MOIP.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/detailed_stats.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/detailed_stats.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @97d4bf4
--- a/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @97d4bf4
+++ b/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @97d4bf4
--- a/rna1999.dG deleted 100644 → 0
View file @97d4bf4
+++ b/rna1999.dG deleted 100644 → 0
View file @97d4bf4
--- a/scripts/Install_CaRNAval_RINs.py deleted 100644 → 0
View file @97d4bf4
+++ b/scripts/Install_CaRNAval_RINs.py deleted 100644 → 0
View file @97d4bf4
- #!/usr/bin/python3
- 
- # This script's purpose is to extract information about the CaRNAval
- # RINS from a Python pickle object containing RINs from their RIN.py class.
- # We do this because the official JSON file is hard to understand, and Antoine Soulé
- # recommended the pickle.
- 
- import networkx, os, pickle, subprocess, sys
- 
- if __name__=="__main__":
- 
-     
-     rin_DIR = os.getcwd() + "/../data/modules/RIN/"
-     filename = "CaRNAval_1_as_dictionnary.nxpickled"
- 
-     # Check that we can find CaRNAval RINs, and load the dataset
-     try:
-         sys.path.append(os.path.abspath(rin_DIR))
-         import RIN
-     except ImportError:
-         # We have to download it
-         subprocess.run(["wget", '-O', '../data/modules/carnaval_dataset.zip', "http://carnaval.lri.fr/carnaval_dataset.zip"])
-         subprocess.run(["unzip", '-ou', '../data/modules/carnaval_dataset.zip', "carnaval_dataset/CaRNAval_1_as_dictionnary.nxpickled", "carnaval_dataset/RIN.py"])
-         subprocess.run(["rm", "-f", "../data/modules/RIN/", "../data/modules/carnaval_dataset.zip"])
-         subprocess.run(["mv", "carnaval_dataset/", "../data/modules/RIN/"])
-         sys.path.append(os.path.abspath(rin_DIR))
-         import RIN
- 
-     try:
-         objects = []
-         with (open(rin_DIR+filename, "rb")) as openfile:
-             while True:
-                 try:
-                     objects.append(pickle.load(openfile))
-                 except EOFError:
-                     break
-         print("Dataset loaded")
-     except OSError:
-         print("File not found : " + rin_DIR + filename)
-         exit(1)
- 
-     # Creation of a directory to extract RINs from the pickle file to individual files
-     try:
-         os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
-     except OSError:
-         print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
-         exit(1)
- 
-     # Loop on every CaRNAval module and extract it from the Python object to flat text file
-     n_modules = len(objects[0]) # ? to
-     for i in range(1,1+n_modules):
-         motif = objects[0][i].graph
-         f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
-         f.write("ntA,ntB,long_range;...\n")
- 
-         components = []
-         comp = []
-         nodes = list(motif)
-         nodes.sort()
-         for node in nodes:
-             if comp == []:
-                 comp.append(node)
-             else:
-                 if comp[-1] + 1 != node : #not the same component
-                     components.append(comp)
-                     comp = []
-                     comp.append(node)
-                 else :
-                     comp.append(node)
-         components.append(comp)
- 
-         #print(nodes)
- 
-         basepairs = ""
-         edges = list(motif.edges())
-         for a in edges:
-             if motif.edges[a]['label'] == 'CWW' :
-                 ntA = nodes.index(a[0])
-                 ntB = nodes.index(a[1])
- 
-                 if ntA <= ntB :
-                     basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
- 
-         f.write(basepairs + "\n")
-         f.write("pos;k;seq\n")
- 
-         num_nt = -1
-         for a in components:
-             seq = ""
-             data_comp = str(num_nt+1)
-             for b in a:
-                 num_nt += 1
- 
-                 # sometimes in the nxpicled file, a node has the attribute "realnt", 
-                 # and sometimes "real_nt", but it's the same thing
-                 try:
-                     seq += motif.nodes[b]["realnt"]
-                 except:
-                     seq += motif.nodes[b]["real_nt"]
-             data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
-             f.write(data_comp)
- 
-         f.close()
-         # print(str(i-1) + ".txt created")
- 
-     print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
- 
--- a/scripts/benchmark_on_seq_length.py deleted 100644 → 0
View file @97d4bf4
+++ b/scripts/benchmark_on_seq_length.py deleted 100644 → 0
View file @97d4bf4
- # ============================ IMPORTS ====================================
- import subprocess
- import time
- import resource
- 
- # take a RNA sequence and cut it from 100 bases to actual length
- # then measure computation time, peak memory, and number of solutions for each length
- 
- # This RNA is actually a 16S rRNA from PDB 1J5E.
- # http://ndbserver.rutgers.edu/service/ndb/atlas/summary
- seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
- 
- step = 100
- n = len(seq)
- 
- while step < len(seq)+50:
- 	sub_seq = seq[0:(min(step,n))]
- 
- 	# write the sequence to file
- 	fasta = open("data/fasta/ZDFS33.fa", 'w')
- 	fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
- 	fasta.close()
- 
- 	# run biorseo on it, with default options
- 	cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
- 	old_time = time.time()
- 	output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
- 	run_time = time.time() - old_time
- 	max_ram = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
- 
- 	for line in output :
- 		if "Quitting because combinatorial issues" in line :
- 			nb_sol = -1
- 		elif "solutions kept" in line :
- 			nb_sol = line.split(",")[1].split()[0]
- 
- 	print(len(sub_seq), "first nucleotides :", nb_sol, "solutions in", run_time, "seconds, using", max_ram, "kb of RAM")
- 
- 	step += 50
--- a/scripts/build_BiORSEO_docker_image_ubuntu18.sh deleted 100755 → 0
View file @97d4bf4
+++ b/scripts/build_BiORSEO_docker_image_ubuntu18.sh deleted 100755 → 0
View file @97d4bf4
- #!/bin/bash
- 
- echo "WARNING: The purpose of this file is to document how the docker image was built.";
- echo "You cannot execute it directly, because of licensing reasons. Please get your own:";
- echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
- echo "- Nupack header files: nupack_3.2.2.tar.gz";
- exit 0;
- 
- cd ../
- THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
- 
- ####################################################### Dependencies ##############################################################
- sudo apt install -y clang-7 cmake make automake libboost-program-options-dev libboost-filesystem-dev openjdk-11-jre
- sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100
- sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
- 
- # CPLEX: only to build biorseo
- # HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
- chmod +x cplex_installer_12.8_Student.bin
- printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
- rm cplex_installer_12.8_Student.bin
- 
- # Eigen: only to build biorseo (no need to give it to the docker image)
- wget http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz -O eigen_src.tar.gz
- tar -xf eigen_src.tar.gz
- cd eigen-eigen-323c052e1731
- mkdir build
- cd build
- cmake ..
- sudo make install
- cd ../..
- rm -rf eigen_src.tar.gz eigen-eigen-323c052e1731
- 
- # Nupack: only to build biorseo (no need to give it to the docker image)
- #curl -u yourname@yourUni.com:yourPassword http://www.nupack.org/downloads/serve_file/nupack3.2.2.tar.gz --output nupack3.2.2.tar.gz
- tar -xf nupack3.2.2.tar.gz
- cd nupack3.2.2
- mkdir build
- cd build
- cmake ..
- make -j8
- sudo make install
- cd ../..
- sudo cp nupack3.2.2/src/thermo/*.h /usr/local/include/nupack/thermo/
- rm -rf nupack3.2.2.tar.gz nupack3.2.2/
- 
- # BayesPairing: install on the docker image (done by the Dockerfile)
- git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
- 
- ######################################################### Build Biorseo ###########################################################
- # build here, install later on the docker image (done by the Dockerfile)
- mkdir -p results
- make -j 8
- make clean
- rm -rf doc/ obj/
- 
- ######################################################## Build Docker container ##################################################
- # Execute the Dockerfile and build the image
- docker build . -t biorseo
--- a/scripts/deploy_BiORSEO_docker_image_linux.sh deleted 100755 → 0
View file @97d4bf4
+++ b/scripts/deploy_BiORSEO_docker_image_linux.sh deleted 100755 → 0
View file @97d4bf4
- 
- #!/bin/bash
- ######################################################## RNA modules ##############################################################
- 
- cd ../
- 
- # Rna3Dmotifs data
- mkdir -p data/modules/DESC
- wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
- tar -xvzf CATALOGUE.tgz 
- mv No_Redondance_DESC/*.desc data/modules/DESC/
- rm -r No_Redondance_VIEW3D No_Redondance_DESC CATALOGUE.tgz
- 
- # The RNA 3D Motif Atlas
- mkdir -p data/modules/BGSU
- wget http://rna.bgsu.edu/data/jar3d/models/HL/HL_3.2_models.zip
- unzip HL_3.2_models.zip
- mv HL data/modules/BGSU
- rm HL_3.2_models.zip
- wget http://rna.bgsu.edu/data/jar3d/models/IL/IL_3.2_models.zip
- unzip IL_3.2_models.zip
- mv IL data/modules/BGSU
- rm IL_3.2_models.zip
- 
- # Install BayesPairing
- sudo -H pip3 install --upgrade pip
- sudo -H pip3 install networkx numpy regex wrapt biopython
- git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
- cd BayesPairing
- sudo -H pip3 install .
- 
- # Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
- cd bayespairing/src
- python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
- python3 parse_sequences.py -d 3dmotifatlas -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
- cd ../../..
- 
- ######################################################## Run it ##############################################################
- 
- # docker run -v `pwd`/data/modules:/modules -v `pwd`/BayesPairing/bayespairing:/byp -v `pwd`/results:/biorseo/results biorseo ./biorseo.py -i /biorseo/data/fasta/applications.fa --rna3dmotifs --patternmatch --func B
\ No newline at end of file
--- a/scripts/pareto_visualizer.py deleted 100755 → 0
View file @97d4bf4
+++ b/scripts/pareto_visualizer.py deleted 100755 → 0
View file @97d4bf4