Showing
6 changed files
with
71 additions
and
30 deletions
... | @@ -9,7 +9,7 @@ Contact : louis.becquey@univ-evry.fr | ... | @@ -9,7 +9,7 @@ Contact : louis.becquey@univ-evry.fr |
9 | 1/ How it works | 9 | 1/ How it works |
10 | =================================== | 10 | =================================== |
11 | INPUT: | 11 | INPUT: |
12 | -- An RNA sequence (tested with sequences ~100 bases) | 12 | +- An RNA sequence (with 16 GB of RAM you can go up to ~230 bases) |
13 | 13 | ||
14 | THEN | 14 | THEN |
15 | - **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing) | 15 | - **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing) | ... | ... |
... | @@ -21,19 +21,18 @@ bypdir = "" | ... | @@ -21,19 +21,18 @@ bypdir = "" |
21 | biorseoDir = "." | 21 | biorseoDir = "." |
22 | exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) | 22 | exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) |
23 | runDir = path.dirname(path.realpath(__file__)) | 23 | runDir = path.dirname(path.realpath(__file__)) |
24 | -tempDir = biorseoDir + "/temp/" | 24 | +modulespath = biorseoDir + "/data/modules" |
25 | -HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib" | 25 | +HLmotifDir = modulespath + "/BGSU/HL/3.2/lib" |
26 | -ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib" | 26 | +ILmotifDir = modulespath + "/BGSU/IL/3.2/lib" |
27 | -descfolder = biorseoDir + "/data/modules/DESC" | 27 | +descfolder = modulespath + "/DESC" |
28 | 28 | ||
29 | # Parse options | 29 | # Parse options |
30 | try: | 30 | try: |
31 | - opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func="]) | 31 | + opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:pt:v", ["verbose", "rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=","help","version","seq=","modules-path=", "first-objective=","output=","theta=","interrupt-limit="]) |
32 | -except getopt.GetoptError: | 32 | +except getopt.GetoptError as err: |
33 | - print("Please provide arguments !") | 33 | + print(err) |
34 | sys.exit(2) | 34 | sys.exit(2) |
35 | 35 | ||
36 | - | ||
37 | m = Manager() | 36 | m = Manager() |
38 | running_stats = m.list() | 37 | running_stats = m.list() |
39 | running_stats.append(0) # n_launched | 38 | running_stats.append(0) # n_launched |
... | @@ -335,38 +334,75 @@ class BiorseoInstance: | ... | @@ -335,38 +334,75 @@ class BiorseoInstance: |
335 | self.jobcount = 0 | 334 | self.jobcount = 0 |
336 | self.joblist = [] | 335 | self.joblist = [] |
337 | self.mode = 0 # default is single sequence mode | 336 | self.mode = 0 # default is single sequence mode |
337 | + self.forward_options = [] | ||
338 | 338 | ||
339 | for opt, arg in opts: | 339 | for opt, arg in opts: |
340 | - if opt == "-h": | 340 | + if opt == "-h" or opt == "--help": |
341 | - print("biorseo.py -i myRNA.fa -o myRNA.rawB --rna3dmotifs --patternmatch --func B") | 341 | + print( "Biorseo, Bi-Objective RNA Structure Efficient Optimizer\n" |
342 | - print("biorseo.py -i myRNA.fa -o myRNA.jar3dB --3dmotifatlas --jar3d --func B") | 342 | + "Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules.\n" |
343 | - print("biorseo.py -i myRNA.fa -o myRNA.bgsubypD --3dmotifatlas --bayespairing --func D") | 343 | + "Developped by Louis Becquey (louis.becquey@univ-evry.fr), 2019\n\n") |
344 | + print("Usage:\tYou must provide:\n\t1) a FASTA input file with -i,\n\t2) a module type with --rna3dmotifs or --3dmotifatlas" | ||
345 | + "\n\t3) one module placement method in { --patternmatch, --jar3d, --bayespairing }\n\t") | ||
346 | + print("Options:") | ||
347 | + print("-h [ --help ]\t\tPrint this help message") | ||
348 | + print("--version\t\t\tPrint the program version") | ||
349 | + print("-i [ --seq ]\t\tFASTA file with the query RNA sequence") | ||
350 | + print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence") | ||
351 | + print("-j [ --jar3d ]\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)") | ||
352 | + print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence") | ||
353 | + print("-o [ --output ]\t\tFolder where to output files") | ||
354 | + print("-f [ --func ]\t\t(A, B, C or D, default is B)" | ||
355 | + "\t\t\t\tObjective function to score module insertions: (A) insert big modules (B) insert light, high-order modules" | ||
356 | + "\t\t\t\t(c) insert modules which score well with the sequence (D) insert light, high-order modules which score well with the sequence.") | ||
357 | + "\t\t\t\tC and D require cannot be used with --patternmatch.") | ||
358 | + | ||
359 | + print("biorseo.py -i myRNA.fa -o myResultsFolder/ --rna3dmotifs --patternmatch --func B") | ||
360 | + print("biorseo.py -i myRNA.fa -o myResultsFolder/ --3dmotifatlas --jar3d --func B") | ||
361 | + print("biorseo.py -i myRNA.fa --3dmotifatlas --bayespairing --func D") | ||
344 | sys.exit() | 362 | sys.exit() |
345 | - elif opt == "-i": | 363 | + elif opt == "-i" or opt == "--seq": |
346 | self.inputfile = arg | 364 | self.inputfile = arg |
347 | - elif opt == "-o": | 365 | + elif opt == "-o" or opt == "--output": |
348 | self.outputf = arg # output file or folder... | 366 | self.outputf = arg # output file or folder... |
349 | if self.outputf[1] != '/': | 367 | if self.outputf[1] != '/': |
350 | self.outputf = getcwd() + '/' + self.outputf | 368 | self.outputf = getcwd() + '/' + self.outputf |
351 | if self.outputf[-1] != '/': | 369 | if self.outputf[-1] != '/': |
352 | self.outputf = self.outputf + '/' | 370 | self.outputf = self.outputf + '/' |
353 | - elif opt == "--func": | 371 | + elif opt == "-f" or opt == "--func": |
354 | if arg in ['A', 'B', 'C', 'D']: | 372 | if arg in ['A', 'B', 'C', 'D']: |
355 | self.func = arg | 373 | self.func = arg |
356 | else: | 374 | else: |
357 | raise "Unknown scoring function " + arg | 375 | raise "Unknown scoring function " + arg |
358 | - elif opt == "--patternmatch": | 376 | + elif opt == "-p" or opt == "--patternmatch": |
359 | self.type = "dpm" | 377 | self.type = "dpm" |
360 | - elif opt == "--jar3d": | 378 | + elif opt == "-j" or opt == "--jar3d": |
361 | self.type = "jar3d" | 379 | self.type = "jar3d" |
362 | - elif opt == "--bayespairing": | 380 | + elif opt == "-b" or opt == "--bayespairing": |
363 | self.type = "byp" | 381 | self.type = "byp" |
364 | elif opt == "--rna3dmotifs": | 382 | elif opt == "--rna3dmotifs": |
365 | self.modules = "desc" | 383 | self.modules = "desc" |
366 | elif opt == "--3dmotifatlas": | 384 | elif opt == "--3dmotifatlas": |
367 | self.modules = "bgsu" | 385 | self.modules = "bgsu" |
368 | - else: | 386 | + elif opt == "--modulespath": |
369 | - raise "Unknown option " + opt | 387 | + HLmotifDir = arg + "/HL/3.2/lib" |
388 | + ILmotifDir = arg + "/IL/3.2/lib" | ||
389 | + descfolder = arg | ||
390 | + elif opt == "--version": | ||
391 | + subprocess.call([biorseoDir+"/bin/biorseo", "--version"]) | ||
392 | + exit(0) | ||
393 | + elif opt == "-l" or opt == "--interrupt-limit": | ||
394 | + self.forward_options.append("-l") | ||
395 | + self.forward_options.append(arg) | ||
396 | + elif opt == "-v" or opt == "--verbose": | ||
397 | + self.forward_options.append("-v") | ||
398 | + elif opt == "-n" or opt == "--disable-pseudoknots": | ||
399 | + self.forward_options.append("-n") | ||
400 | + elif opt == "-t" or opt == "--theta": | ||
401 | + self.forward_options.append("-t") | ||
402 | + self.forward_options.append(arg) | ||
403 | + elif opt == "-c" or opt == "--first-objective": | ||
404 | + self.forward_options.append("-c") | ||
405 | + self.forward_options.append(arg) | ||
370 | 406 | ||
371 | print("saving files to", self.outputf) | 407 | print("saving files to", self.outputf) |
372 | # create jobs | 408 | # create jobs |
... | @@ -793,7 +829,8 @@ class BiorseoInstance: | ... | @@ -793,7 +829,8 @@ class BiorseoInstance: |
793 | command = [executable, "-s", fastafile ] | 829 | command = [executable, "-s", fastafile ] |
794 | if method_type: | 830 | if method_type: |
795 | command += [ method_type, csv ] | 831 | command += [ method_type, csv ] |
796 | - command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ] | 832 | + command += [ "-o", self.outputf + instance.header + ext + self.func, "--function", self.func ] |
833 | + command += self.forward_options | ||
797 | self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3)) | 834 | self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3)) |
798 | 835 | ||
799 | 836 | ... | ... |
... | @@ -21,6 +21,7 @@ char MOIP::obj_function_nbr_ = 'A'; | ... | @@ -21,6 +21,7 @@ char MOIP::obj_function_nbr_ = 'A'; |
21 | uint MOIP::obj_to_solve_ = 1; | 21 | uint MOIP::obj_to_solve_ = 1; |
22 | double MOIP::precision_ = 1e-5; | 22 | double MOIP::precision_ = 1e-5; |
23 | bool MOIP::allow_pk_ = true; | 23 | bool MOIP::allow_pk_ = true; |
24 | +uint MOIP::max_sol_nbr_ = 500; | ||
24 | 25 | ||
25 | unsigned getNumConstraints(IloModel& m) | 26 | unsigned getNumConstraints(IloModel& m) |
26 | { | 27 | { |
... | @@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s) | ... | @@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s) |
499 | { | 500 | { |
500 | if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl; | 501 | if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl; |
501 | pareto_.push_back(s); | 502 | pareto_.push_back(s); |
502 | - if (pareto_.size() > 500) { | 503 | + if (pareto_.size() > max_sol_nbr_) { |
503 | - cerr << "\033[31m Quitting because combinatorial issues (>500 solutions in Pareto set). \033[0m" << endl; | 504 | + cerr << "\033[31m Quitting because combinatorial issues (>" << max_sol_nbr_ << " solutions in Pareto set). \033[0m" << endl; |
504 | exit(1); | 505 | exit(1); |
505 | } | 506 | } |
506 | } | 507 | } | ... | ... |
... | @@ -30,6 +30,7 @@ class MOIP | ... | @@ -30,6 +30,7 @@ class MOIP |
30 | static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ? | 30 | static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ? |
31 | static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =( | 31 | static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =( |
32 | static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true) | 32 | static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true) |
33 | + static uint max_sol_nbr_; // Number of solutions to accept in the Pareto set before we give up the computation | ||
33 | 34 | ||
34 | private: | 35 | private: |
35 | bool is_undominated_yet(const SecondaryStructure& s); | 36 | bool is_undominated_yet(const SecondaryStructure& s); | ... | ... |
... | @@ -73,14 +73,15 @@ int main(int argc, char* argv[]) | ... | @@ -73,14 +73,15 @@ int main(int argc, char* argv[]) |
73 | ("version", "Print the program version") | 73 | ("version", "Print the program version") |
74 | ("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence") | 74 | ("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence") |
75 | ("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program") | 75 | ("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program") |
76 | - ("jar3dcsv", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py") | 76 | + ("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py") |
77 | - ("bayespaircsv", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py") | 77 | + ("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py") |
78 | ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm") | 78 | ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm") |
79 | ("output,o", po::value<string>(&outputName), "A file to summarize the computation results") | 79 | ("output,o", po::value<string>(&outputName), "A file to summarize the computation results") |
80 | ("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing") | 80 | ("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing") |
81 | - ("type,f", po::value<char>(&obj_function_nbr)->default_value('A'), "What objective function to use to include motifs: square of motif size in nucleotides like " | 81 | + ("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like " |
82 | - "RNA-MoIP (A), motif size + number of components (B), site score (C), motif size + site score + number of components (D)") | 82 | + "RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)") |
83 | ("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots") | 83 | ("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots") |
84 | + ("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.") | ||
84 | ("verbose,v", "Print what is happening to stdout"); | 85 | ("verbose,v", "Print what is happening to stdout"); |
85 | po::variables_map vm; | 86 | po::variables_map vm; |
86 | po::store(po::parse_command_line(argc, argv, desc), vm); | 87 | po::store(po::parse_command_line(argc, argv, desc), vm); |
... | @@ -99,7 +100,7 @@ int main(int argc, char* argv[]) | ... | @@ -99,7 +100,7 @@ int main(int argc, char* argv[]) |
99 | return EXIT_SUCCESS; | 100 | return EXIT_SUCCESS; |
100 | } | 101 | } |
101 | if (vm.count("version")) { | 102 | if (vm.count("version")) { |
102 | - cout << "Biorseo v1.0, May 2019" << endl; | 103 | + cout << "Biorseo v1.01, June 2019" << endl; |
103 | return EXIT_SUCCESS; | 104 | return EXIT_SUCCESS; |
104 | } | 105 | } |
105 | if (vm.count("verbose")) verbose = true; | 106 | if (vm.count("verbose")) verbose = true; |
... | @@ -112,7 +113,7 @@ int main(int argc, char* argv[]) | ... | @@ -112,7 +113,7 @@ int main(int argc, char* argv[]) |
112 | return EXIT_FAILURE; | 113 | return EXIT_FAILURE; |
113 | } | 114 | } |
114 | if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) { | 115 | if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) { |
115 | - cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --type C or --type D.\033[0m See " | 116 | + cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See " |
116 | "--help for more " | 117 | "--help for more " |
117 | "information." | 118 | "information." |
118 | << endl; | 119 | << endl; | ... | ... |
-
Please register or login to post a comment