Showing
16 changed files
with
0 additions
and
276 deletions
.dockerignore
deleted
100644 → 0
Dockerfile
deleted
100644 → 0
1 | -FROM ubuntu:bionic | ||
2 | - | ||
3 | -# installing dependencies | ||
4 | -RUN apt-get update -yq && \ | ||
5 | - apt-get upgrade -y && \ | ||
6 | - apt-get install -y python3-dev python3-pip openjdk-11-jre libgsl23 libgslcblas0 libboost-program-options-dev libboost-filesystem-dev && \ | ||
7 | - rm -rf /var/lib/apt/lists/* | ||
8 | - | ||
9 | -# compiled biorseo | ||
10 | -COPY . /biorseo | ||
11 | -# ViennaRNA installer | ||
12 | -ADD "https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_18_04/viennarna_2.4.14-1_amd64.deb" / | ||
13 | -# jar3d archive | ||
14 | -ADD http://rna.bgsu.edu/data/jar3d/models/jar3d_2014-12-11.jar / | ||
15 | - | ||
16 | -# install codes | ||
17 | -RUN dpkg -i /viennarna_2.4.14-1_amd64.deb && \ | ||
18 | - apt-get install -f && \ | ||
19 | - \ | ||
20 | - pip3 install networkx numpy regex wrapt biopython /biorseo/BayesPairing && \ | ||
21 | - \ | ||
22 | - cd / && \ | ||
23 | - rm -rf /biorseo/BayesPairing /ViennaRNA-2.4.13 /ViennaRNA-2.4.13.tar.gz | ||
24 | -WORKDIR /biorseo | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
benchmark.py
deleted
100755 → 0
This diff is collapsed. Click to expand it.
figures/best_MCCs.png
deleted
100644 → 0
99.5 KB
figures/compare_subopt_MOIP.png
deleted
100644 → 0
48.7 KB
figures/detailed_stats.png
deleted
100644 → 0
32.4 KB
37.2 KB
38.2 KB
36.1 KB
37 KB
rna1999.dG
deleted
100644 → 0
This diff is collapsed. Click to expand it.
scripts/Install_CaRNAval_RINs.py
deleted
100644 → 0
1 | -#!/usr/bin/python3 | ||
2 | - | ||
3 | -# This script's purpose is to extract information about the CaRNAval | ||
4 | -# RINS from a Python pickle object containing RINs from their RIN.py class. | ||
5 | -# We do this because the official JSON file is hard to understand, and Antoine Soulé | ||
6 | -# recommended the pickle. | ||
7 | - | ||
8 | -import networkx, os, pickle, subprocess, sys | ||
9 | - | ||
10 | -if __name__=="__main__": | ||
11 | - | ||
12 | - | ||
13 | - rin_DIR = os.getcwd() + "/../data/modules/RIN/" | ||
14 | - filename = "CaRNAval_1_as_dictionnary.nxpickled" | ||
15 | - | ||
16 | - # Check that we can find CaRNAval RINs, and load the dataset | ||
17 | - try: | ||
18 | - sys.path.append(os.path.abspath(rin_DIR)) | ||
19 | - import RIN | ||
20 | - except ImportError: | ||
21 | - # We have to download it | ||
22 | - subprocess.run(["wget", '-O', '../data/modules/carnaval_dataset.zip', "http://carnaval.lri.fr/carnaval_dataset.zip"]) | ||
23 | - subprocess.run(["unzip", '-ou', '../data/modules/carnaval_dataset.zip', "carnaval_dataset/CaRNAval_1_as_dictionnary.nxpickled", "carnaval_dataset/RIN.py"]) | ||
24 | - subprocess.run(["rm", "-f", "../data/modules/RIN/", "../data/modules/carnaval_dataset.zip"]) | ||
25 | - subprocess.run(["mv", "carnaval_dataset/", "../data/modules/RIN/"]) | ||
26 | - sys.path.append(os.path.abspath(rin_DIR)) | ||
27 | - import RIN | ||
28 | - | ||
29 | - try: | ||
30 | - objects = [] | ||
31 | - with (open(rin_DIR+filename, "rb")) as openfile: | ||
32 | - while True: | ||
33 | - try: | ||
34 | - objects.append(pickle.load(openfile)) | ||
35 | - except EOFError: | ||
36 | - break | ||
37 | - print("Dataset loaded") | ||
38 | - except OSError: | ||
39 | - print("File not found : " + rin_DIR + filename) | ||
40 | - exit(1) | ||
41 | - | ||
42 | - # Creation of a directory to extract RINs from the pickle file to individual files | ||
43 | - try: | ||
44 | - os.makedirs(rin_DIR + "Subfiles", exist_ok=True) | ||
45 | - except OSError: | ||
46 | - print("Creation of the directory %s failed" % (rin_DIR + "Subfiles")) | ||
47 | - exit(1) | ||
48 | - | ||
49 | - # Loop on every CaRNAval module and extract it from the Python object to flat text file | ||
50 | - n_modules = len(objects[0]) # ? to | ||
51 | - for i in range(1,1+n_modules): | ||
52 | - motif = objects[0][i].graph | ||
53 | - f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+") | ||
54 | - f.write("ntA,ntB,long_range;...\n") | ||
55 | - | ||
56 | - components = [] | ||
57 | - comp = [] | ||
58 | - nodes = list(motif) | ||
59 | - nodes.sort() | ||
60 | - for node in nodes: | ||
61 | - if comp == []: | ||
62 | - comp.append(node) | ||
63 | - else: | ||
64 | - if comp[-1] + 1 != node : #not the same component | ||
65 | - components.append(comp) | ||
66 | - comp = [] | ||
67 | - comp.append(node) | ||
68 | - else : | ||
69 | - comp.append(node) | ||
70 | - components.append(comp) | ||
71 | - | ||
72 | - #print(nodes) | ||
73 | - | ||
74 | - basepairs = "" | ||
75 | - edges = list(motif.edges()) | ||
76 | - for a in edges: | ||
77 | - if motif.edges[a]['label'] == 'CWW' : | ||
78 | - ntA = nodes.index(a[0]) | ||
79 | - ntB = nodes.index(a[1]) | ||
80 | - | ||
81 | - if ntA <= ntB : | ||
82 | - basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";" | ||
83 | - | ||
84 | - f.write(basepairs + "\n") | ||
85 | - f.write("pos;k;seq\n") | ||
86 | - | ||
87 | - num_nt = -1 | ||
88 | - for a in components: | ||
89 | - seq = "" | ||
90 | - data_comp = str(num_nt+1) | ||
91 | - for b in a: | ||
92 | - num_nt += 1 | ||
93 | - | ||
94 | - # sometimes in the nxpicled file, a node has the attribute "realnt", | ||
95 | - # and sometimes "real_nt", but it's the same thing | ||
96 | - try: | ||
97 | - seq += motif.nodes[b]["realnt"] | ||
98 | - except: | ||
99 | - seq += motif.nodes[b]["real_nt"] | ||
100 | - data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n" | ||
101 | - f.write(data_comp) | ||
102 | - | ||
103 | - f.close() | ||
104 | - # print(str(i-1) + ".txt created") | ||
105 | - | ||
106 | - print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='') | ||
107 | - |
scripts/benchmark_on_seq_length.py
deleted
100644 → 0
1 | -# ============================ IMPORTS ==================================== | ||
2 | -import subprocess | ||
3 | -import time | ||
4 | -import resource | ||
5 | - | ||
6 | -# take a RNA sequence and cut it from 100 bases to actual length | ||
7 | -# then measure computation time, peak memory, and number of solutions for each length | ||
8 | - | ||
9 | -# This RNA is actually a 16S rRNA from PDB 1J5E. | ||
10 | -# http://ndbserver.rutgers.edu/service/ndb/atlas/summary | ||
11 | -seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU" | ||
12 | - | ||
13 | -step = 100 | ||
14 | -n = len(seq) | ||
15 | - | ||
16 | -while step < len(seq)+50: | ||
17 | - sub_seq = seq[0:(min(step,n))] | ||
18 | - | ||
19 | - # write the sequence to file | ||
20 | - fasta = open("data/fasta/ZDFS33.fa", 'w') | ||
21 | - fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq) | ||
22 | - fasta.close() | ||
23 | - | ||
24 | - # run biorseo on it, with default options | ||
25 | - cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"] | ||
26 | - old_time = time.time() | ||
27 | - output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:] | ||
28 | - run_time = time.time() - old_time | ||
29 | - max_ram = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss | ||
30 | - | ||
31 | - for line in output : | ||
32 | - if "Quitting because combinatorial issues" in line : | ||
33 | - nb_sol = -1 | ||
34 | - elif "solutions kept" in line : | ||
35 | - nb_sol = line.split(",")[1].split()[0] | ||
36 | - | ||
37 | - print(len(sub_seq), "first nucleotides :", nb_sol, "solutions in", run_time, "seconds, using", max_ram, "kb of RAM") | ||
38 | - | ||
39 | - step += 50 |
1 | -#!/bin/bash | ||
2 | - | ||
3 | -echo "WARNING: The purpose of this file is to document how the docker image was built."; | ||
4 | -echo "You cannot execute it directly, because of licensing reasons. Please get your own:"; | ||
5 | -echo "- CPLEX academic version: cplex_installer_12.8_Student.bin"; | ||
6 | -echo "- Nupack header files: nupack_3.2.2.tar.gz"; | ||
7 | -exit 0; | ||
8 | - | ||
9 | -cd ../ | ||
10 | -THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
11 | - | ||
12 | -####################################################### Dependencies ############################################################## | ||
13 | -sudo apt install -y clang-7 cmake make automake libboost-program-options-dev libboost-filesystem-dev openjdk-11-jre | ||
14 | -sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100 | ||
15 | -sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100 | ||
16 | - | ||
17 | -# CPLEX: only to build biorseo | ||
18 | -# HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore. | ||
19 | -chmod +x cplex_installer_12.8_Student.bin | ||
20 | -printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin | ||
21 | -rm cplex_installer_12.8_Student.bin | ||
22 | - | ||
23 | -# Eigen: only to build biorseo (no need to give it to the docker image) | ||
24 | -wget http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz -O eigen_src.tar.gz | ||
25 | -tar -xf eigen_src.tar.gz | ||
26 | -cd eigen-eigen-323c052e1731 | ||
27 | -mkdir build | ||
28 | -cd build | ||
29 | -cmake .. | ||
30 | -sudo make install | ||
31 | -cd ../.. | ||
32 | -rm -rf eigen_src.tar.gz eigen-eigen-323c052e1731 | ||
33 | - | ||
34 | -# Nupack: only to build biorseo (no need to give it to the docker image) | ||
35 | -#curl -u yourname@yourUni.com:yourPassword http://www.nupack.org/downloads/serve_file/nupack3.2.2.tar.gz --output nupack3.2.2.tar.gz | ||
36 | -tar -xf nupack3.2.2.tar.gz | ||
37 | -cd nupack3.2.2 | ||
38 | -mkdir build | ||
39 | -cd build | ||
40 | -cmake .. | ||
41 | -make -j8 | ||
42 | -sudo make install | ||
43 | -cd ../.. | ||
44 | -sudo cp nupack3.2.2/src/thermo/*.h /usr/local/include/nupack/thermo/ | ||
45 | -rm -rf nupack3.2.2.tar.gz nupack3.2.2/ | ||
46 | - | ||
47 | -# BayesPairing: install on the docker image (done by the Dockerfile) | ||
48 | -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing | ||
49 | - | ||
50 | -######################################################### Build Biorseo ########################################################### | ||
51 | -# build here, install later on the docker image (done by the Dockerfile) | ||
52 | -mkdir -p results | ||
53 | -make -j 8 | ||
54 | -make clean | ||
55 | -rm -rf doc/ obj/ | ||
56 | - | ||
57 | -######################################################## Build Docker container ################################################## | ||
58 | -# Execute the Dockerfile and build the image | ||
59 | -docker build . -t biorseo |
1 | - | ||
2 | -#!/bin/bash | ||
3 | -######################################################## RNA modules ############################################################## | ||
4 | - | ||
5 | -cd ../ | ||
6 | - | ||
7 | -# Rna3Dmotifs data | ||
8 | -mkdir -p data/modules/DESC | ||
9 | -wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz | ||
10 | -tar -xvzf CATALOGUE.tgz | ||
11 | -mv No_Redondance_DESC/*.desc data/modules/DESC/ | ||
12 | -rm -r No_Redondance_VIEW3D No_Redondance_DESC CATALOGUE.tgz | ||
13 | - | ||
14 | -# The RNA 3D Motif Atlas | ||
15 | -mkdir -p data/modules/BGSU | ||
16 | -wget http://rna.bgsu.edu/data/jar3d/models/HL/HL_3.2_models.zip | ||
17 | -unzip HL_3.2_models.zip | ||
18 | -mv HL data/modules/BGSU | ||
19 | -rm HL_3.2_models.zip | ||
20 | -wget http://rna.bgsu.edu/data/jar3d/models/IL/IL_3.2_models.zip | ||
21 | -unzip IL_3.2_models.zip | ||
22 | -mv IL data/modules/BGSU | ||
23 | -rm IL_3.2_models.zip | ||
24 | - | ||
25 | -# Install BayesPairing | ||
26 | -sudo -H pip3 install --upgrade pip | ||
27 | -sudo -H pip3 install networkx numpy regex wrapt biopython | ||
28 | -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing | ||
29 | -cd BayesPairing | ||
30 | -sudo -H pip3 install . | ||
31 | - | ||
32 | -# Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time) | ||
33 | -cd bayespairing/src | ||
34 | -python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............." | ||
35 | -python3 parse_sequences.py -d 3dmotifatlas -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............." | ||
36 | -cd ../../.. | ||
37 | - | ||
38 | -######################################################## Run it ############################################################## | ||
39 | - | ||
40 | -# docker run -v `pwd`/data/modules:/modules -v `pwd`/BayesPairing/bayespairing:/byp -v `pwd`/results:/biorseo/results biorseo ./biorseo.py -i /biorseo/data/fasta/applications.fa --rna3dmotifs --patternmatch --func B | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
scripts/pareto_visualizer.py
deleted
100755 → 0
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment