Louis BECQUEY

Full help in command line

......@@ -19,6 +19,7 @@ bin/*
# results
results/*
benchmark_results/*
log_of_the_run.sh
logBadDesc.txt
gurobi.log
......
......@@ -9,7 +9,7 @@ Contact : louis.becquey@univ-evry.fr
1/ How it works
===================================
INPUT:
- An RNA sequence (tested with sequences ~100 bases)
- An RNA sequence (with 16 GB of RAM you can go up to ~230 bases)
THEN
- **Pattern-matching step** : Find all possible occurrences of known RNAmodules in the query sequence, by finding subsequences of the querythat score well with the probabilistic models of the modules (like JAR3D, or BayesPairing)
......
......@@ -21,19 +21,18 @@ bypdir = ""
biorseoDir = "."
exec(compile(open(biorseoDir+"/EditMe").read(), '', 'exec'))
runDir = path.dirname(path.realpath(__file__))
tempDir = biorseoDir + "/temp/"
HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib"
ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib"
descfolder = biorseoDir + "/data/modules/DESC"
modulespath = biorseoDir + "/data/modules"
HLmotifDir = modulespath + "/BGSU/HL/3.2/lib"
ILmotifDir = modulespath + "/BGSU/IL/3.2/lib"
descfolder = modulespath + "/DESC"
# Parse options
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:o:", ["rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func="])
except getopt.GetoptError:
print("Please provide arguments !")
opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:pt:v", ["verbose", "rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=","help","version","seq=","modules-path=", "first-objective=","output=","theta=","interrupt-limit="])
except getopt.GetoptError as err:
print(err)
sys.exit(2)
m = Manager()
running_stats = m.list()
running_stats.append(0) # n_launched
......@@ -335,38 +334,75 @@ class BiorseoInstance:
self.jobcount = 0
self.joblist = []
self.mode = 0 # default is single sequence mode
self.forward_options = []
for opt, arg in opts:
if opt == "-h":
print("biorseo.py -i myRNA.fa -o myRNA.rawB --rna3dmotifs --patternmatch --func B")
print("biorseo.py -i myRNA.fa -o myRNA.jar3dB --3dmotifatlas --jar3d --func B")
print("biorseo.py -i myRNA.fa -o myRNA.bgsubypD --3dmotifatlas --bayespairing --func D")
if opt == "-h" or opt == "--help":
print( "Biorseo, Bi-Objective RNA Structure Efficient Optimizer\n"
"Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules.\n"
"Developped by Louis Becquey (louis.becquey@univ-evry.fr), 2019\n\n")
print("Usage:\tYou must provide:\n\t1) a FASTA input file with -i,\n\t2) a module type with --rna3dmotifs or --3dmotifatlas"
"\n\t3) one module placement method in { --patternmatch, --jar3d, --bayespairing }\n\t")
print("Options:")
print("-h [ --help ]\t\tPrint this help message")
print("--version\t\t\tPrint the program version")
print("-i [ --seq ]\t\tFASTA file with the query RNA sequence")
print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence")
print("-j [ --jar3d ]\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)")
print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence")
print("-o [ --output ]\t\tFolder where to output files")
print("-f [ --func ]\t\t(A, B, C or D, default is B)"
"\t\t\t\tObjective function to score module insertions: (A) insert big modules (B) insert light, high-order modules"
"\t\t\t\t(c) insert modules which score well with the sequence (D) insert light, high-order modules which score well with the sequence.")
"\t\t\t\tC and D require cannot be used with --patternmatch.")
print("biorseo.py -i myRNA.fa -o myResultsFolder/ --rna3dmotifs --patternmatch --func B")
print("biorseo.py -i myRNA.fa -o myResultsFolder/ --3dmotifatlas --jar3d --func B")
print("biorseo.py -i myRNA.fa --3dmotifatlas --bayespairing --func D")
sys.exit()
elif opt == "-i":
elif opt == "-i" or opt == "--seq":
self.inputfile = arg
elif opt == "-o":
elif opt == "-o" or opt == "--output":
self.outputf = arg # output file or folder...
if self.outputf[1] != '/':
self.outputf = getcwd() + '/' + self.outputf
if self.outputf[-1] != '/':
self.outputf = self.outputf + '/'
elif opt == "--func":
elif opt == "-f" or opt == "--func":
if arg in ['A', 'B', 'C', 'D']:
self.func = arg
else:
raise "Unknown scoring function " + arg
elif opt == "--patternmatch":
elif opt == "-p" or opt == "--patternmatch":
self.type = "dpm"
elif opt == "--jar3d":
elif opt == "-j" or opt == "--jar3d":
self.type = "jar3d"
elif opt == "--bayespairing":
elif opt == "-b" or opt == "--bayespairing":
self.type = "byp"
elif opt == "--rna3dmotifs":
self.modules = "desc"
elif opt == "--3dmotifatlas":
self.modules = "bgsu"
else:
raise "Unknown option " + opt
elif opt == "--modulespath":
HLmotifDir = arg + "/HL/3.2/lib"
ILmotifDir = arg + "/IL/3.2/lib"
descfolder = arg
elif opt == "--version":
subprocess.call([biorseoDir+"/bin/biorseo", "--version"])
exit(0)
elif opt == "-l" or opt == "--interrupt-limit":
self.forward_options.append("-l")
self.forward_options.append(arg)
elif opt == "-v" or opt == "--verbose":
self.forward_options.append("-v")
elif opt == "-n" or opt == "--disable-pseudoknots":
self.forward_options.append("-n")
elif opt == "-t" or opt == "--theta":
self.forward_options.append("-t")
self.forward_options.append(arg)
elif opt == "-c" or opt == "--first-objective":
self.forward_options.append("-c")
self.forward_options.append(arg)
print("saving files to", self.outputf)
# create jobs
......@@ -793,7 +829,8 @@ class BiorseoInstance:
command = [executable, "-s", fastafile ]
if method_type:
command += [ method_type, csv ]
command += [ "-o", self.outputf + instance.header + ext + self.func, "--type", self.func ]
command += [ "-o", self.outputf + instance.header + ext + self.func, "--function", self.func ]
command += self.forward_options
self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3))
......
......@@ -21,6 +21,7 @@ char MOIP::obj_function_nbr_ = 'A';
uint MOIP::obj_to_solve_ = 1;
double MOIP::precision_ = 1e-5;
bool MOIP::allow_pk_ = true;
uint MOIP::max_sol_nbr_ = 500;
unsigned getNumConstraints(IloModel& m)
{
......@@ -499,8 +500,8 @@ void MOIP::add_solution(const SecondaryStructure& s)
{
if (verbose_) cout << "\t>adding structure to Pareto set :\t" << s.to_string() << endl;
pareto_.push_back(s);
if (pareto_.size() > 500) {
cerr << "\033[31m Quitting because combinatorial issues (>500 solutions in Pareto set). \033[0m" << endl;
if (pareto_.size() > max_sol_nbr_) {
cerr << "\033[31m Quitting because combinatorial issues (>" << max_sol_nbr_ << " solutions in Pareto set). \033[0m" << endl;
exit(1);
}
}
......
......@@ -30,7 +30,8 @@ class MOIP
static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ?
static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =(
static bool allow_pk_; // Wether we forbid pseudoknots (false) or allow them (true)
static uint max_sol_nbr_; // Number of solutions to accept in the Pareto set before we give up the computation
private:
bool is_undominated_yet(const SecondaryStructure& s);
void define_problem_constraints(void);
......
......@@ -73,14 +73,15 @@ int main(int argc, char* argv[])
("version", "Print the program version")
("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence")
("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program")
("jar3dcsv", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("bayespaircsv", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by test_on_RNAstrand.py")
("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(1), "Objective to solve in the mono-objective portions of the algorithm")
("output,o", po::value<string>(&outputName), "A file to summarize the computation results")
("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing")
("type,f", po::value<char>(&obj_function_nbr)->default_value('A'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), motif size + number of components (B), site score (C), motif size + site score + number of components (D)")
("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)")
("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots")
("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.")
("verbose,v", "Print what is happening to stdout");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
......@@ -99,7 +100,7 @@ int main(int argc, char* argv[])
return EXIT_SUCCESS;
}
if (vm.count("version")) {
cout << "Biorseo v1.0, May 2019" << endl;
cout << "Biorseo v1.01, June 2019" << endl;
return EXIT_SUCCESS;
}
if (vm.count("verbose")) verbose = true;
......@@ -112,7 +113,7 @@ int main(int argc, char* argv[])
return EXIT_FAILURE;
}
if (vm.count("-d") and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) {
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --type C or --type D.\033[0m See "
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See "
"--help for more "
"information."
<< endl;
......