Louis BECQUEY

removed stuff from biorseo

1 -results_*
2 -build_BiORSEO_docker_image_ubuntu18.sh
3 -deploy_BiORSEO_docker_image_linux.sh
4 -INSTALL.md
5 -Readme.md
6 -benchmark_results/
7 -doc/
1 -FROM ubuntu:bionic
2 -
3 -# installing dependencies
4 -RUN apt-get update -yq && \
5 - apt-get upgrade -y && \
6 - apt-get install -y python3-dev python3-pip openjdk-11-jre libgsl23 libgslcblas0 libboost-program-options-dev libboost-filesystem-dev && \
7 - rm -rf /var/lib/apt/lists/*
8 -
9 -# compiled biorseo
10 -COPY . /biorseo
11 -# ViennaRNA installer
12 -ADD "https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_18_04/viennarna_2.4.14-1_amd64.deb" /
13 -# jar3d archive
14 -ADD http://rna.bgsu.edu/data/jar3d/models/jar3d_2014-12-11.jar /
15 -
16 -# install codes
17 -RUN dpkg -i /viennarna_2.4.14-1_amd64.deb && \
18 - apt-get install -f && \
19 - \
20 - pip3 install networkx numpy regex wrapt biopython /biorseo/BayesPairing && \
21 - \
22 - cd / && \
23 - rm -rf /biorseo/BayesPairing /ViennaRNA-2.4.13 /ViennaRNA-2.4.13.tar.gz
24 -WORKDIR /biorseo
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 -#!/usr/bin/python3
2 -
3 -# This script's purpose is to extract information about the CaRNAval
4 -# RINS from a Python pickle object containing RINs from their RIN.py class.
5 -# We do this because the official JSON file is hard to understand, and Antoine Soulé
6 -# recommended the pickle.
7 -
8 -import networkx, os, pickle, subprocess, sys
9 -
10 -if __name__=="__main__":
11 -
12 -
13 - rin_DIR = os.getcwd() + "/../data/modules/RIN/"
14 - filename = "CaRNAval_1_as_dictionnary.nxpickled"
15 -
16 - # Check that we can find CaRNAval RINs, and load the dataset
17 - try:
18 - sys.path.append(os.path.abspath(rin_DIR))
19 - import RIN
20 - except ImportError:
21 - # We have to download it
22 - subprocess.run(["wget", '-O', '../data/modules/carnaval_dataset.zip', "http://carnaval.lri.fr/carnaval_dataset.zip"])
23 - subprocess.run(["unzip", '-ou', '../data/modules/carnaval_dataset.zip', "carnaval_dataset/CaRNAval_1_as_dictionnary.nxpickled", "carnaval_dataset/RIN.py"])
24 - subprocess.run(["rm", "-f", "../data/modules/RIN/", "../data/modules/carnaval_dataset.zip"])
25 - subprocess.run(["mv", "carnaval_dataset/", "../data/modules/RIN/"])
26 - sys.path.append(os.path.abspath(rin_DIR))
27 - import RIN
28 -
29 - try:
30 - objects = []
31 - with (open(rin_DIR+filename, "rb")) as openfile:
32 - while True:
33 - try:
34 - objects.append(pickle.load(openfile))
35 - except EOFError:
36 - break
37 - print("Dataset loaded")
38 - except OSError:
39 - print("File not found : " + rin_DIR + filename)
40 - exit(1)
41 -
42 - # Creation of a directory to extract RINs from the pickle file to individual files
43 - try:
44 - os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
45 - except OSError:
46 - print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
47 - exit(1)
48 -
49 - # Loop on every CaRNAval module and extract it from the Python object to flat text file
50 - n_modules = len(objects[0]) # ? to
51 - for i in range(1,1+n_modules):
52 - motif = objects[0][i].graph
53 - f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
54 - f.write("ntA,ntB,long_range;...\n")
55 -
56 - components = []
57 - comp = []
58 - nodes = list(motif)
59 - nodes.sort()
60 - for node in nodes:
61 - if comp == []:
62 - comp.append(node)
63 - else:
64 - if comp[-1] + 1 != node : #not the same component
65 - components.append(comp)
66 - comp = []
67 - comp.append(node)
68 - else :
69 - comp.append(node)
70 - components.append(comp)
71 -
72 - #print(nodes)
73 -
74 - basepairs = ""
75 - edges = list(motif.edges())
76 - for a in edges:
77 - if motif.edges[a]['label'] == 'CWW' :
78 - ntA = nodes.index(a[0])
79 - ntB = nodes.index(a[1])
80 -
81 - if ntA <= ntB :
82 - basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
83 -
84 - f.write(basepairs + "\n")
85 - f.write("pos;k;seq\n")
86 -
87 - num_nt = -1
88 - for a in components:
89 - seq = ""
90 - data_comp = str(num_nt+1)
91 - for b in a:
92 - num_nt += 1
93 -
94 - # sometimes in the nxpicled file, a node has the attribute "realnt",
95 - # and sometimes "real_nt", but it's the same thing
96 - try:
97 - seq += motif.nodes[b]["realnt"]
98 - except:
99 - seq += motif.nodes[b]["real_nt"]
100 - data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
101 - f.write(data_comp)
102 -
103 - f.close()
104 - # print(str(i-1) + ".txt created")
105 -
106 - print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
107 -
1 -# ============================ IMPORTS ====================================
2 -import subprocess
3 -import time
4 -import resource
5 -
6 -# take a RNA sequence and cut it from 100 bases to actual length
7 -# then measure computation time, peak memory, and number of solutions for each length
8 -
9 -# This RNA is actually a 16S rRNA from PDB 1J5E.
10 -# http://ndbserver.rutgers.edu/service/ndb/atlas/summary
11 -seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
12 -
13 -step = 100
14 -n = len(seq)
15 -
16 -while step < len(seq)+50:
17 - sub_seq = seq[0:(min(step,n))]
18 -
19 - # write the sequence to file
20 - fasta = open("data/fasta/ZDFS33.fa", 'w')
21 - fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
22 - fasta.close()
23 -
24 - # run biorseo on it, with default options
25 - cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
26 - old_time = time.time()
27 - output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
28 - run_time = time.time() - old_time
29 - max_ram = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
30 -
31 - for line in output :
32 - if "Quitting because combinatorial issues" in line :
33 - nb_sol = -1
34 - elif "solutions kept" in line :
35 - nb_sol = line.split(",")[1].split()[0]
36 -
37 - print(len(sub_seq), "first nucleotides :", nb_sol, "solutions in", run_time, "seconds, using", max_ram, "kb of RAM")
38 -
39 - step += 50
1 -#!/bin/bash
2 -
3 -echo "WARNING: The purpose of this file is to document how the docker image was built.";
4 -echo "You cannot execute it directly, because of licensing reasons. Please get your own:";
5 -echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
6 -echo "- Nupack header files: nupack_3.2.2.tar.gz";
7 -exit 0;
8 -
9 -cd ../
10 -THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
11 -
12 -####################################################### Dependencies ##############################################################
13 -sudo apt install -y clang-7 cmake make automake libboost-program-options-dev libboost-filesystem-dev openjdk-11-jre
14 -sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100
15 -sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
16 -
17 -# CPLEX: only to build biorseo
18 -# HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
19 -chmod +x cplex_installer_12.8_Student.bin
20 -printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
21 -rm cplex_installer_12.8_Student.bin
22 -
23 -# Eigen: only to build biorseo (no need to give it to the docker image)
24 -wget http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz -O eigen_src.tar.gz
25 -tar -xf eigen_src.tar.gz
26 -cd eigen-eigen-323c052e1731
27 -mkdir build
28 -cd build
29 -cmake ..
30 -sudo make install
31 -cd ../..
32 -rm -rf eigen_src.tar.gz eigen-eigen-323c052e1731
33 -
34 -# Nupack: only to build biorseo (no need to give it to the docker image)
35 -#curl -u yourname@yourUni.com:yourPassword http://www.nupack.org/downloads/serve_file/nupack3.2.2.tar.gz --output nupack3.2.2.tar.gz
36 -tar -xf nupack3.2.2.tar.gz
37 -cd nupack3.2.2
38 -mkdir build
39 -cd build
40 -cmake ..
41 -make -j8
42 -sudo make install
43 -cd ../..
44 -sudo cp nupack3.2.2/src/thermo/*.h /usr/local/include/nupack/thermo/
45 -rm -rf nupack3.2.2.tar.gz nupack3.2.2/
46 -
47 -# BayesPairing: install on the docker image (done by the Dockerfile)
48 -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
49 -
50 -######################################################### Build Biorseo ###########################################################
51 -# build here, install later on the docker image (done by the Dockerfile)
52 -mkdir -p results
53 -make -j 8
54 -make clean
55 -rm -rf doc/ obj/
56 -
57 -######################################################## Build Docker container ##################################################
58 -# Execute the Dockerfile and build the image
59 -docker build . -t biorseo
1 -
2 -#!/bin/bash
3 -######################################################## RNA modules ##############################################################
4 -
5 -cd ../
6 -
7 -# Rna3Dmotifs data
8 -mkdir -p data/modules/DESC
9 -wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
10 -tar -xvzf CATALOGUE.tgz
11 -mv No_Redondance_DESC/*.desc data/modules/DESC/
12 -rm -r No_Redondance_VIEW3D No_Redondance_DESC CATALOGUE.tgz
13 -
14 -# The RNA 3D Motif Atlas
15 -mkdir -p data/modules/BGSU
16 -wget http://rna.bgsu.edu/data/jar3d/models/HL/HL_3.2_models.zip
17 -unzip HL_3.2_models.zip
18 -mv HL data/modules/BGSU
19 -rm HL_3.2_models.zip
20 -wget http://rna.bgsu.edu/data/jar3d/models/IL/IL_3.2_models.zip
21 -unzip IL_3.2_models.zip
22 -mv IL data/modules/BGSU
23 -rm IL_3.2_models.zip
24 -
25 -# Install BayesPairing
26 -sudo -H pip3 install --upgrade pip
27 -sudo -H pip3 install networkx numpy regex wrapt biopython
28 -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
29 -cd BayesPairing
30 -sudo -H pip3 install .
31 -
32 -# Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
33 -cd bayespairing/src
34 -python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
35 -python3 parse_sequences.py -d 3dmotifatlas -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
36 -cd ../../..
37 -
38 -######################################################## Run it ##############################################################
39 -
40 -# docker run -v `pwd`/data/modules:/modules -v `pwd`/BayesPairing/bayespairing:/byp -v `pwd`/results:/biorseo/results biorseo ./biorseo.py -i /biorseo/data/fasta/applications.fa --rna3dmotifs --patternmatch --func B
...\ No newline at end of file ...\ No newline at end of file
This diff is collapsed. Click to expand it.