Full help in command line

1/ How it works
- An RNA sequence (tested with sequences ~100 bases)
- An RNA sequence (with 16 GB of RAM you can go up to ~230 bases)
- **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing)
# Parse options
opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func="])
except getopt.GetoptError:
print("Please provide arguments !")
opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:pt:v", ["verbose", "rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=","help","version","seq=","modules-path=", "first-objective=","output=","theta=","interrupt-limit="])
except getopt.GetoptError as err:
m = Manager()
running_stats = m.list()
running_stats.append(0) # n_launched
self.jobcount = 0
self.joblist = []
self.mode = 0 # default is single sequence mode
self.forward_options = []
for opt, arg in opts:
if opt == "-h" or opt == "--help":
print( "Biorseo, Bi-Objective RNA Structure Efficient Optimizer\n"
"Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules.\n"
"Developped by Louis Becquey (louis.becquey@univ-evry.fr), 2019\n\n")
print("Usage:\tYou must provide:\n\t1) a FASTA input file with -i,\n\t2) a module type with --rna3dmotifs or --3dmotifatlas"
"\n\t3) one module placement method in { --patternmatch, --jar3d, --bayespairing }\n\t")
print("-h [ --help ]\t\tPrint this help message")
print("--version\t\t\tPrint the program version")
print("-i [ --seq ]\t\tFASTA file with the query RNA sequence")
print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence")
print("-j [ --jar3d ]\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)")
print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence")
print("-o [ --output ]\t\tFolder where to output files")
print("-f [ --func ]\t\t(A, B, C or D, default is B)"
"\t\t\t\tObjective function to score module insertions: (A) insert big modules (B) insert light, high-order modules"
"\t\t\t\t(c) insert modules which score well with the sequence (D) insert light, high-order modules which score well with the sequence.")
"\t\t\t\tC and D require cannot be used with --patternmatch.")
print("biorseo.py -i myRNA.fa -o myResultsFolder/ --rna3dmotifs --patternmatch --func B")
print("biorseo.py -i myRNA.fa -o myResultsFolder/ --3dmotifatlas --jar3d --func B")
print("biorseo.py -i myRNA.fa --3dmotifatlas --bayespairing --func D")
elif opt == "-i":
elif opt == "-i" or opt == "--seq":
self.inputfile = arg
elif opt == "-o":
elif opt == "-o" or opt == "--output":
self.outputf = arg # output file or folder...
if self.outputf[1] != '/':
self.outputf = getcwd() + '/' + self.outputf
if self.outputf[-1] != '/':
self.outputf = self.outputf + '/'
elif opt == "--func":
elif opt == "-f" or opt == "--func":
if arg in ['A', 'B', 'C', 'D']:
self.func = arg
raise "Unknown scoring function " + arg
elif opt == "--patternmatch":
elif opt == "-p" or opt == "--patternmatch":
self.type = "dpm"
elif opt == "--jar3d":
elif opt == "-j" or opt == "--jar3d":
self.type = "jar3d"
elif opt == "--bayespairing":
elif opt == "-b" or opt == "--bayespairing":
self.type = "byp"
elif opt == "--rna3dmotifs":
self.modules = "desc"
elif opt == "--3dmotifatlas":
self.modules = "bgsu"
raise "Unknown option " + opt
elif opt == "--modulespath":
HLmotifDir = arg + "/HL/3.2/lib"
ILmotifDir = arg + "/IL/3.2/lib"
descfolder = arg
elif opt == "--version":
subprocess.call([biorseoDir+"/bin/biorseo", "--version"])
elif opt == "-l" or opt == "--interrupt-limit":
elif opt == "-v" or opt == "--verbose":
elif opt == "-n" or opt == "--disable-pseudoknots":
elif opt == "-t" or opt == "--theta":
elif opt == "-c" or opt == "--first-objective":
print("saving files to", self.outputf)
# create jobs
command = [executable, "-s", fastafile ]
if method_type:
command += [ method_type, csv ]
command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ]
command += [ "-o", self.outputf + instance.header + ext + self.func, "--function", self.func ]
command += self.forward_options
self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3))
uint MOIP::obj_to_solve_ = 1;
double MOIP::precision_ = 1e-5;
bool MOIP::allow_pk_ = true;
uint MOIP::max_sol_nbr_ = 500;
unsigned getNumConstraints(IloModel& m)
......@@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s)
if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl;
if (pareto_.size() > 500) {
cerr << "\033[31m Quitting because combinatorial issues (>500 solutions in Pareto set). \033[0m" << endl;
if (pareto_.size() > max_sol_nbr_) {
cerr << "\033[31m Quitting because combinatorial issues (>" << max_sol_nbr_ << " solutions in Pareto set). \033[0m" << endl;
static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ?
static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =(
static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true)
static uint max_sol_nbr_; // Number of solutions to accept in the Pareto set before we give up the computation
bool is_undominated_yet(const SecondaryStructure& s);
void define_problem_constraints(void);
("version", "Print the program version")
("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence")
("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program")
("jar3dcsv", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("bayespaircsv", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm")
("output,o", po::value<string>(&outputName), "A file to summarize the computation results")
("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing")
("type,f", po::value<char>(&obj_function_nbr)->default_value('A'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), motif size + number of components (B), site score (C), motif size + site score + number of components (D)")
("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)")
("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots")
("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.")
("verbose,v", "Print what is happening to stdout");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
if (vm.count("version")) {
cout << "Biorseo v1.0, May 2019" << endl;
cout << "Biorseo v1.01, June 2019" << endl;
if (vm.count("verbose")) verbose = true;
if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) {
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --type C or --type D.\033[0m See "
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See "
"--help for more "
<< endl;