Louis BECQUEY

-O option to keep temp files

...@@ -43,7 +43,11 @@ OBJECTIVE FUNCTIONS FOR THE MODULE INSERTION CRITERIA ...@@ -43,7 +43,11 @@ OBJECTIVE FUNCTIONS FOR THE MODULE INSERTION CRITERIA
43 * **Function C** : weights a module by its insertion site score (JAR3D or BayesPairing score). 43 * **Function C** : weights a module by its insertion site score (JAR3D or BayesPairing score).
44 * **Function D** : weights a module by its number of components (strands) and insertion site score (JAR3D or BayesPairing score), and penalizes it by the log^(_2) of its nucleotide size. 44 * **Function D** : weights a module by its number of components (strands) and insertion site score (JAR3D or BayesPairing score), and penalizes it by the log^(_2) of its nucleotide size.
45 45
46 -3/ Recommended uses 46 +3/ Installation
47 +==================================
48 +Check the file INSTALL.md for installation instructions.
49 +
50 +4/ Recommended uses
47 ================================== 51 ==================================
48 - If **you know you have no pseudoknot**: 52 - If **you know you have no pseudoknot**:
49 * Benchmarks show Biorseo does not perform better than simpler tools like RNAsubopt alone. Please use RNAsubopt (ViennaRNA package) or Fold (RNAstructure package). 53 * Benchmarks show Biorseo does not perform better than simpler tools like RNAsubopt alone. Please use RNAsubopt (ViennaRNA package) or Fold (RNAstructure package).
...@@ -55,11 +59,6 @@ OBJECTIVE FUNCTIONS FOR THE MODULE INSERTION CRITERIA ...@@ -55,11 +59,6 @@ OBJECTIVE FUNCTIONS FOR THE MODULE INSERTION CRITERIA
55 * The use of the RNA 3D Motif Atlas placed by JAR3D and scored with function B is not subject to combinatorial issues, but performs a bit worse. It also returns less solutions. Example: 59 * The use of the RNA 3D Motif Atlas placed by JAR3D and scored with function B is not subject to combinatorial issues, but performs a bit worse. It also returns less solutions. Example:
56 `./bin/biorseo -i PDB_00304.fa --3dmotifatlas --jar3d --func B` 60 `./bin/biorseo -i PDB_00304.fa --3dmotifatlas --jar3d --func B`
57 61
58 -
59 -4/ Installation
60 -==================================
61 -Check the file INSTALL.md for installation instructions.
62 -
63 5/ List of Options 62 5/ List of Options
64 ================================== 63 ==================================
65 ``` 64 ```
...@@ -69,7 +68,8 @@ Check the file INSTALL.md for installation instructions. ...@@ -69,7 +68,8 @@ Check the file INSTALL.md for installation instructions.
69 -p [ --patternmatch ] Use regular expressions to place modules in the sequence 68 -p [ --patternmatch ] Use regular expressions to place modules in the sequence
70 -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas) 69 -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas)
71 -b [ --bayespairing ] Use BayesPairing to place modules in the sequence 70 -b [ --bayespairing ] Use BayesPairing to place modules in the sequence
72 --o [ --output=… ] Folder where to output files 71 +-o [ --output=… ] File to summarize the results
72 +-O [ --outputf=… ] Folder where to output result and temp files
73 -f [ --func=… ] (A, B, C or D, default is B) Objective function to score module insertions: 73 -f [ --func=… ] (A, B, C or D, default is B) Objective function to score module insertions:
74 (A) insert big modules (B) insert light, high-order modules 74 (A) insert big modules (B) insert light, high-order modules
75 (c) insert modules which score well with the sequence 75 (c) insert modules which score well with the sequence
......
...@@ -4,13 +4,14 @@ import sys ...@@ -4,13 +4,14 @@ import sys
4 import getopt 4 import getopt
5 from scipy import stats 5 from scipy import stats
6 import subprocess 6 import subprocess
7 -from os import path, makedirs, getcwd, chdir, devnull 7 +from os import path, makedirs, getcwd, chdir, devnull, remove, walk
8 import matplotlib.pyplot as plt 8 import matplotlib.pyplot as plt
9 from matplotlib import colors 9 from matplotlib import colors
10 from math import sqrt 10 from math import sqrt
11 from multiprocessing import cpu_count, Manager 11 from multiprocessing import cpu_count, Manager
12 import multiprocessing 12 import multiprocessing
13 import ast 13 import ast
14 +from shutil import move
14 15
15 16
16 # ================== DEFINITION OF THE PATHS ============================== 17 # ================== DEFINITION OF THE PATHS ==============================
...@@ -29,7 +30,9 @@ tempDir = "temp/" ...@@ -29,7 +30,9 @@ tempDir = "temp/"
29 30
30 # Parse options 31 # Parse options
31 try: 32 try:
32 - opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:pt:v", ["verbose", "rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=","help","version","seq=","modules-path=", "first-objective=","output=","theta=","interrupt-limit="]) 33 + opts, args = getopt.getopt(sys.argv[1:], "bc:f:hi:jl:no:O:pt:v", [ "verbose","rna3dmotifs","3dmotifatlas","jar3d","bayespairing","patternmatch","func=",
34 + "help","version","seq=","modules-path=", "first-objective=","output=","theta=",
35 + "interrupt-limit=", "outputf="])
33 except getopt.GetoptError as err: 36 except getopt.GetoptError as err:
34 print(err) 37 print(err)
35 sys.exit(2) 38 sys.exit(2)
...@@ -152,7 +155,9 @@ class BiorseoInstance: ...@@ -152,7 +155,9 @@ class BiorseoInstance:
152 self.modules = "desc" 155 self.modules = "desc"
153 self.func = 'B' 156 self.func = 'B'
154 self.inputfile = "" 157 self.inputfile = ""
155 - self.outputf = biorseoDir + "/results/" # default results location 158 + self.finalname = ""
159 + self.outputf = ""
160 + self.output = ""
156 self.jobcount = 0 161 self.jobcount = 0
157 self.joblist = [] 162 self.joblist = []
158 self.mode = 0 # default is single sequence mode 163 self.mode = 0 # default is single sequence mode
...@@ -173,7 +178,8 @@ class BiorseoInstance: ...@@ -173,7 +178,8 @@ class BiorseoInstance:
173 print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence") 178 print("-p [ --patternmatch ]\t\tUse regular expressions to place modules in the sequence")
174 print("-j [ --jar3d ]\t\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)") 179 print("-j [ --jar3d ]\t\t\tUse JAR3D to place modules in the sequence (requires --3dmotifatlas)")
175 print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence") 180 print("-b [ --bayespairing ]\t\tUse BayesPairing to place modules in the sequence")
176 - print("-o [ --output=… ]\t\tFolder where to output files") 181 + print("-o [ --output=… ]\t\tFile to summarize the results")
182 + print("-O [ --outputf=… ]\t\tFolder where to output result and temp files")
177 print("-f [ --func=… ]\t\t\t(A, B, C or D, default is B)" 183 print("-f [ --func=… ]\t\t\t(A, B, C or D, default is B)"
178 " Objective function to score module insertions:\n\t\t\t\t (A) insert big modules (B) insert light, high-order modules" 184 " Objective function to score module insertions:\n\t\t\t\t (A) insert big modules (B) insert light, high-order modules"
179 "\n\t\t\t\t (c) insert modules which score well with the sequence\n\t\t\t\t (D) insert light, high-order modules which score well with the sequence." 185 "\n\t\t\t\t (c) insert modules which score well with the sequence\n\t\t\t\t (D) insert light, high-order modules which score well with the sequence."
...@@ -195,12 +201,16 @@ class BiorseoInstance: ...@@ -195,12 +201,16 @@ class BiorseoInstance:
195 sys.exit() 201 sys.exit()
196 elif opt == "-i" or opt == "--seq": 202 elif opt == "-i" or opt == "--seq":
197 self.inputfile = arg 203 self.inputfile = arg
198 - elif opt == "-o" or opt == "--output": 204 + elif opt == "-O" or opt == "--outputf":
199 - self.outputf = arg # output file or folder... 205 + self.outputf = arg # output folder
200 if self.outputf[1] != '/': 206 if self.outputf[1] != '/':
201 self.outputf = getcwd() + '/' + self.outputf 207 self.outputf = getcwd() + '/' + self.outputf
202 if self.outputf[-1] != '/': 208 if self.outputf[-1] != '/':
203 self.outputf = self.outputf + '/' 209 self.outputf = self.outputf + '/'
210 + elif opt == "-o" or opt == "--output":
211 + self.output = arg # output file
212 + if self.output[1] != '/':
213 + self.output = getcwd() + '/' + self.output
204 elif opt == "-f" or opt == "--func": 214 elif opt == "-f" or opt == "--func":
205 if arg in ['A', 'B', 'C', 'D']: 215 if arg in ['A', 'B', 'C', 'D']:
206 self.func = arg 216 self.func = arg
...@@ -237,14 +247,48 @@ class BiorseoInstance: ...@@ -237,14 +247,48 @@ class BiorseoInstance:
237 self.forward_options.append("-c") 247 self.forward_options.append("-c")
238 self.forward_options.append(arg) 248 self.forward_options.append(arg)
239 249
240 - print("saving files to", self.outputf) 250 + if self.outputf != "":
251 + print("saving files to", self.outputf)
252 +
241 # create jobs 253 # create jobs
242 self.list_jobs() 254 self.list_jobs()
243 255
244 # run them 256 # run them
245 self.execute_jobs() 257 self.execute_jobs()
246 258
247 - # subprocess.call(["rm", "-rf", tempDir]) # empty the temp folder 259 + # locate the results at the right place
260 + if self.output != "" and self.outputf != "":
261 + for src_dir, dirs, files in walk(tempDir):
262 + dst_dir = src_dir.replace(tempDir, self.outputf, 1)
263 + if not path.exists(dst_dir):
264 + makedirs(dst_dir)
265 + for file_ in files:
266 + src_file = path.join(src_dir, file_)
267 + dst_file = path.join(dst_dir, file_)
268 + if path.exists(dst_file):
269 + # in case of the src and dst are the same file
270 + if path.samefile(src_file, dst_file):
271 + continue
272 + remove(dst_file)
273 + move(src_file, dst_dir)
274 + subprocess.call(["mv", self.outputf+self.finalname.split('/')[-1], self.output])
275 + elif self.output != "":
276 + subprocess.call(["mv", self.finalname, self.output])
277 + elif self.outputf != "":
278 + for src_dir, dirs, files in walk(tempDir):
279 + dst_dir = src_dir.replace(tempDir, self.outputf, 1)
280 + if not path.exists(dst_dir):
281 + makedirs(dst_dir)
282 + for file_ in files:
283 + src_file = path.join(src_dir, file_)
284 + dst_file = path.join(dst_dir, file_)
285 + if path.exists(dst_file):
286 + # in case of the src and dst are the same file
287 + if path.samefile(src_file, dst_file):
288 + continue
289 + remove(dst_file)
290 + move(src_file, dst_dir)
291 + subprocess.call(["rm", "-rf", tempDir]) # remove the temp folder
248 292
249 def enumerate_loops(self, s): 293 def enumerate_loops(self, s):
250 def resort(unclosedLoops): 294 def resort(unclosedLoops):
...@@ -539,7 +583,8 @@ class BiorseoInstance: ...@@ -539,7 +583,8 @@ class BiorseoInstance:
539 583
540 # Read fasta file, which can contain one or several RNAs 584 # Read fasta file, which can contain one or several RNAs
541 RNAcontainer = [] 585 RNAcontainer = []
542 - subprocess.call(["mkdir", "-p", self.outputf]) # Create the output folder 586 + if self.outputf != "":
587 + subprocess.call(["mkdir", "-p", self.outputf]) # Create the output folder
543 subprocess.call(["mkdir", "-p", tempDir]) # Create the temp folder 588 subprocess.call(["mkdir", "-p", tempDir]) # Create the temp folder
544 print("loading file %s..." % self.inputfile) 589 print("loading file %s..." % self.inputfile)
545 db = open(self.inputfile, "r") 590 db = open(self.inputfile, "r")
...@@ -610,7 +655,8 @@ class BiorseoInstance: ...@@ -610,7 +655,8 @@ class BiorseoInstance:
610 command = [executable, "-s", fastafile ] 655 command = [executable, "-s", fastafile ]
611 if method_type: 656 if method_type:
612 command += [ method_type, csv ] 657 command += [ method_type, csv ]
613 - command += [ "-o", self.outputf + instance.header + ext + self.func, "--function", self.func ] 658 + self.finalname = tempDir + instance.header + ext + self.func
659 + command += [ "-o", self.finalname, "--function", self.func ]
614 command += self.forward_options 660 command += self.forward_options
615 self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3)) 661 self.joblist.append(Job(command=command, priority=priority, timeout=3600, how_many_in_parallel=3))
616 662
......