Louis BECQUEY

Full help in command line

...@@ -19,6 +19,7 @@ bin/* ...@@ -19,6 +19,7 @@ bin/*
19 19
20 # results 20 # results
21 results/* 21 results/*
22 +benchmark_results/*
22 log_of_the_run.sh 23 log_of_the_run.sh
23 logBadDesc.txt 24 logBadDesc.txt
24 gurobi.log 25 gurobi.log
......
...@@ -9,7 +9,7 @@ Contact : louis.becquey@univ-evry.fr ...@@ -9,7 +9,7 @@ Contact : louis.becquey@univ-evry.fr
9 1/ How it works 9 1/ How it works
10 =================================== 10 ===================================
11 INPUT: 11 INPUT:
12 -- An RNA sequence (tested with sequences ~100 bases) 12 +- An RNA sequence (with 16 GB of RAM you can go up to ~230 bases)
13 13
14 THEN 14 THEN
15 - **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing) 15 - **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing)
......
...@@ -21,19 +21,18 @@ bypdir = "" ...@@ -21,19 +21,18 @@ bypdir = ""
21 biorseoDir = "." 21 biorseoDir = "."
22 exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec')) 22 exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec'))
23 runDir = path.dirname(path.realpath(__file__)) 23 runDir = path.dirname(path.realpath(__file__))
24 -tempDir = biorseoDir + "/temp/" 24 +modulespath = biorseoDir + "/data/modules"
25 -HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib" 25 +HLmotifDir = modulespath + "/BGSU/HL/3.2/lib"
26 -ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib" 26 +ILmotifDir = modulespath + "/BGSU/IL/3.2/lib"
27 -descfolder = biorseoDir + "/data/modules/DESC" 27 +descfolder = modulespath + "/DESC"
28 28
29 # Parse options 29 # Parse options
30 try: 30 try:
31 - opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func="]) 31 + opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:pt:v", ["verbose", "rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=","help","version","seq=","modules-path=", "first-objective=","output=","theta=","interrupt-limit="])
32 -except getopt.GetoptError: 32 +except getopt.GetoptError as err:
33 - print("Please provide arguments !") 33 + print(err)
34 sys.exit(2) 34 sys.exit(2)
35 35
36 -
37 m = Manager() 36 m = Manager()
38 running_stats = m.list() 37 running_stats = m.list()
39 running_stats.append(0) # n_launched 38 running_stats.append(0) # n_launched
...@@ -335,38 +334,75 @@ class BiorseoInstance: ...@@ -335,38 +334,75 @@ class BiorseoInstance:
335 self.jobcount = 0 334 self.jobcount = 0
336 self.joblist = [] 335 self.joblist = []
337 self.mode = 0 # default is single sequence mode 336 self.mode = 0 # default is single sequence mode
337 + self.forward_options = []
338 338
339 for opt, arg in opts: 339 for opt, arg in opts:
340 - if opt == "-h": 340 + if opt == "-h" or opt == "--help":
341 - print("biorseo.py -i myRNA.fa -o myRNA.rawB --rna3dmotifs --patternmatch --func B") 341 + print( "Biorseo, Bi-Objective RNA Structure Efficient Optimizer\n"
342 - print("biorseo.py -i myRNA.fa -o myRNA.jar3dB --3dmotifatlas --jar3d --func B") 342 + "Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules.\n"
343 - print("biorseo.py -i myRNA.fa -o myRNA.bgsubypD --3dmotifatlas --bayespairing --func D") 343 + "Developped by Louis Becquey (louis.becquey@univ-evry.fr), 2019\n\n")
344 + print("Usage:\tYou must provide:\n\t1) a FASTA input file with -i,\n\t2) a module type with --rna3dmotifs or --3dmotifatlas"
345 + "\n\t3) one module placement method in { --patternmatch, --jar3d, --bayespairing }\n\t")
346 + print("Options:")
347 + print("-h [ --help ]\t\tPrint this help message")
348 + print("--version\t\t\tPrint the program version")
349 + print("-i [ --seq ]\t\tFASTA file with the query RNA sequence")
350 + print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence")
351 + print("-j [ --jar3d ]\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)")
352 + print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence")
353 + print("-o [ --output ]\t\tFolder where to output files")
354 + print("-f [ --func ]\t\t(A, B, C or D, default is B)"
355 + "\t\t\t\tObjective function to score module insertions: (A) insert big modules (B) insert light, high-order modules"
356 + "\t\t\t\t(c) insert modules which score well with the sequence (D) insert light, high-order modules which score well with the sequence.")
357 + "\t\t\t\tC and D require cannot be used with --patternmatch.")
358 +
359 + print("biorseo.py -i myRNA.fa -o myResultsFolder/ --rna3dmotifs --patternmatch --func B")
360 + print("biorseo.py -i myRNA.fa -o myResultsFolder/ --3dmotifatlas --jar3d --func B")
361 + print("biorseo.py -i myRNA.fa --3dmotifatlas --bayespairing --func D")
344 sys.exit() 362 sys.exit()
345 - elif opt == "-i": 363 + elif opt == "-i" or opt == "--seq":
346 self.inputfile = arg 364 self.inputfile = arg
347 - elif opt == "-o": 365 + elif opt == "-o" or opt == "--output":
348 self.outputf = arg # output file or folder... 366 self.outputf = arg # output file or folder...
349 if self.outputf[1] != '/': 367 if self.outputf[1] != '/':
350 self.outputf = getcwd() + '/' + self.outputf 368 self.outputf = getcwd() + '/' + self.outputf
351 if self.outputf[-1] != '/': 369 if self.outputf[-1] != '/':
352 self.outputf = self.outputf + '/' 370 self.outputf = self.outputf + '/'
353 - elif opt == "--func": 371 + elif opt == "-f" or opt == "--func":
354 if arg in ['A', 'B', 'C', 'D']: 372 if arg in ['A', 'B', 'C', 'D']:
355 self.func = arg 373 self.func = arg
356 else: 374 else:
357 raise "Unknown scoring function " + arg 375 raise "Unknown scoring function " + arg
358 - elif opt == "--patternmatch": 376 + elif opt == "-p" or opt == "--patternmatch":
359 self.type = "dpm" 377 self.type = "dpm"
360 - elif opt == "--jar3d": 378 + elif opt == "-j" or opt == "--jar3d":
361 self.type = "jar3d" 379 self.type = "jar3d"
362 - elif opt == "--bayespairing": 380 + elif opt == "-b" or opt == "--bayespairing":
363 self.type = "byp" 381 self.type = "byp"
364 elif opt == "--rna3dmotifs": 382 elif opt == "--rna3dmotifs":
365 self.modules = "desc" 383 self.modules = "desc"
366 elif opt == "--3dmotifatlas": 384 elif opt == "--3dmotifatlas":
367 self.modules = "bgsu" 385 self.modules = "bgsu"
368 - else: 386 + elif opt == "--modulespath":
369 - raise "Unknown option " + opt 387 + HLmotifDir = arg + "/HL/3.2/lib"
388 + ILmotifDir = arg + "/IL/3.2/lib"
389 + descfolder = arg
390 + elif opt == "--version":
391 + subprocess.call([biorseoDir+"/bin/biorseo", "--version"])
392 + exit(0)
393 + elif opt == "-l" or opt == "--interrupt-limit":
394 + self.forward_options.append("-l")
395 + self.forward_options.append(arg)
396 + elif opt == "-v" or opt == "--verbose":
397 + self.forward_options.append("-v")
398 + elif opt == "-n" or opt == "--disable-pseudoknots":
399 + self.forward_options.append("-n")
400 + elif opt == "-t" or opt == "--theta":
401 + self.forward_options.append("-t")
402 + self.forward_options.append(arg)
403 + elif opt == "-c" or opt == "--first-objective":
404 + self.forward_options.append("-c")
405 + self.forward_options.append(arg)
370 406
371 print("saving files to", self.outputf) 407 print("saving files to", self.outputf)
372 # create jobs 408 # create jobs
...@@ -793,7 +829,8 @@ class BiorseoInstance: ...@@ -793,7 +829,8 @@ class BiorseoInstance:
793 command = [executable, "-s", fastafile ] 829 command = [executable, "-s", fastafile ]
794 if method_type: 830 if method_type:
795 command += [ method_type, csv ] 831 command += [ method_type, csv ]
796 - command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ] 832 + command += [ "-o", self.outputf + instance.header + ext + self.func, "--function", self.func ]
833 + command += self.forward_options
797 self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3)) 834 self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3))
798 835
799 836
......
...@@ -21,6 +21,7 @@ char MOIP::obj_function_nbr_ = 'A'; ...@@ -21,6 +21,7 @@ char MOIP::obj_function_nbr_ = 'A';
21 uint MOIP::obj_to_solve_ = 1; 21 uint MOIP::obj_to_solve_ = 1;
22 double MOIP::precision_ = 1e-5; 22 double MOIP::precision_ = 1e-5;
23 bool MOIP::allow_pk_ = true; 23 bool MOIP::allow_pk_ = true;
24 +uint MOIP::max_sol_nbr_ = 500;
24 25
25 unsigned getNumConstraints(IloModel& m) 26 unsigned getNumConstraints(IloModel& m)
26 { 27 {
...@@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s) ...@@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s)
499 { 500 {
500 if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl; 501 if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl;
501 pareto_.push_back(s); 502 pareto_.push_back(s);
502 - if (pareto_.size() > 500) { 503 + if (pareto_.size() > max_sol_nbr_) {
503 - cerr << "\033[31m Quitting because combinatorial issues (>500 solutions in Pareto set). \033[0m" << endl; 504 + cerr << "\033[31m Quitting because combinatorial issues (>" << max_sol_nbr_ << " solutions in Pareto set). \033[0m" << endl;
504 exit(1); 505 exit(1);
505 } 506 }
506 } 507 }
......
...@@ -30,7 +30,8 @@ class MOIP ...@@ -30,7 +30,8 @@ class MOIP
30 static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ? 30 static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ?
31 static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =( 31 static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =(
32 static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true) 32 static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true)
33 - 33 + static uint max_sol_nbr_; // Number of solutions to accept in the Pareto set before we give up the computation
34 +
34 private: 35 private:
35 bool is_undominated_yet(const SecondaryStructure& s); 36 bool is_undominated_yet(const SecondaryStructure& s);
36 void define_problem_constraints(void); 37 void define_problem_constraints(void);
......
...@@ -73,14 +73,15 @@ int main(int argc, char* argv[]) ...@@ -73,14 +73,15 @@ int main(int argc, char* argv[])
73 ("version", "Print the program version") 73 ("version", "Print the program version")
74 ("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence") 74 ("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence")
75 ("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program") 75 ("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program")
76 - ("jar3dcsv", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py") 76 + ("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
77 - ("bayespaircsv", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py") 77 + ("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
78 ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm") 78 ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm")
79 ("output,o", po::value<string>(&outputName), "A file to summarize the computation results") 79 ("output,o", po::value<string>(&outputName), "A file to summarize the computation results")
80 ("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing") 80 ("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing")
81 - ("type,f", po::value<char>(&obj_function_nbr)->default_value('A'), "What objective function to use to include motifs: square of motif size in nucleotides like " 81 + ("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like "
82 - "RNA-MoIP (A), motif size + number of components (B), site score (C), motif size + site score + number of components (D)") 82 + "RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)")
83 ("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots") 83 ("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots")
84 + ("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.")
84 ("verbose,v", "Print what is happening to stdout"); 85 ("verbose,v", "Print what is happening to stdout");
85 po::variables_map vm; 86 po::variables_map vm;
86 po::store(po::parse_command_line(argc, argv, desc), vm); 87 po::store(po::parse_command_line(argc, argv, desc), vm);
...@@ -99,7 +100,7 @@ int main(int argc, char* argv[]) ...@@ -99,7 +100,7 @@ int main(int argc, char* argv[])
99 return EXIT_SUCCESS; 100 return EXIT_SUCCESS;
100 } 101 }
101 if (vm.count("version")) { 102 if (vm.count("version")) {
102 - cout << "Biorseo v1.0, May 2019" << endl; 103 + cout << "Biorseo v1.01, June 2019" << endl;
103 return EXIT_SUCCESS; 104 return EXIT_SUCCESS;
104 } 105 }
105 if (vm.count("verbose")) verbose = true; 106 if (vm.count("verbose")) verbose = true;
...@@ -112,7 +113,7 @@ int main(int argc, char* argv[]) ...@@ -112,7 +113,7 @@ int main(int argc, char* argv[])
112 return EXIT_FAILURE; 113 return EXIT_FAILURE;
113 } 114 }
114 if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) { 115 if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) {
115 - cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --type C or --type D.\033[0m See " 116 + cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See "
116 "--help for more " 117 "--help for more "
117 "information." 118 "information."
118 << endl; 119 << endl;
......