Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-06-04 10:31:18 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
78f08463d5323cd30ef50e4d70cbcb344a922312
78f08463
1 parent
e748c210
Creating simple python launcher
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
162 additions
and
13 deletions
biorseo.py
biorseo.py
View file @
78f0846
...
...
@@ -53,6 +53,43 @@ class MyPool(multiprocessing.pool.Pool):
super
(
MyPool
,
self
)
.
__init__
(
*
args
,
**
kwargs
)
class
Loop
:
def
__init__
(
self
,
header
,
subsequence
,
looptype
,
position
):
self
.
header
=
header
self
.
seq
=
subsequence
self
.
type
=
looptype
self
.
position
=
position
def
get_header
(
self
):
return
self
.
header
def
subsequence
(
self
):
return
self
.
seq
class
InsertionSite
:
def
__init__
(
self
,
loop
,
csv_line
):
# BEWARE : jar3d csv output is crap because of java's locale settings.
# On french OSes, it uses commas to delimit the fields AND as floating point delimiters !!
# Parse with caution, and check what the csv output files look like on your system...
info
=
csv_line
.
split
(
','
)
self
.
loop
=
loop
# the Loop object that has been searched with jar3d
# position of the loop's components, so the motif's ones, in the query sequence.
self
.
position
=
loop
.
position
# Motif model identifier of the RNA 3D Motif Atlas
self
.
atlas_id
=
info
[
2
]
# alignment score of the subsequence to the motif model
self
.
score
=
int
(
float
(
info
[
4
]))
# should the motif model be inverted to fit the sequence ?
self
.
rotation
=
int
(
info
[
-
2
])
def
__lt__
(
self
,
other
):
return
self
.
score
<
other
.
score
def
__gt__
(
self
,
other
):
return
self
.
score
>
other
.
score
class
Job
:
def
__init__
(
self
,
command
=
[],
function
=
None
,
args
=
[],
how_many_in_parallel
=
0
,
priority
=
1
,
timeout
=
None
,
checkFunc
=
None
,
checkArgs
=
[]):
self
.
cmd_
=
command
...
...
@@ -72,10 +109,14 @@ class Job:
class
BiorseoInstance
:
def
__init__
(
self
,
argv
):
# set default options
self
.
type
=
"dpm"
self
.
modules
=
"desc"
self
.
func
=
'B'
self
.
outputf
=
outputDir
self
.
jobcount
=
0
# Parse options
try
:
opts
,
args
=
getopt
.
getopt
(
argv
,
"hil::o:"
,
[
"type="
,
"func="
,
"modules="
])
...
...
@@ -124,6 +165,114 @@ class BiorseoInstance:
# Create the output folder
subprocess
.
call
([
"mkdir"
,
"-p"
,
self
.
outputf
])
def
enumerate_loops
(
self
,
s
):
def
resort
(
unclosedLoops
):
loops
.
insert
(
len
(
loops
)
-
1
-
unclosedLoops
,
loops
[
-
1
])
loops
.
pop
(
-
1
)
opened
=
[]
openingStart
=
[]
closingStart
=
[]
loops
=
[]
loopsUnclosed
=
0
consecutiveOpenings
=
[]
if
s
[
0
]
==
'('
:
consecutiveOpenings
.
append
(
1
)
consecutiveClosings
=
0
lastclosed
=
-
1
previous
=
''
for
i
in
range
(
len
(
s
)):
# If we arrive on an unpaired segment
if
s
[
i
]
==
'.'
:
if
previous
==
'('
:
openingStart
.
append
(
i
-
1
)
if
previous
==
')'
:
closingStart
.
append
(
i
-
1
)
# Opening basepair
if
s
[
i
]
==
'('
:
if
previous
==
'('
:
consecutiveOpenings
[
-
1
]
+=
1
else
:
consecutiveOpenings
.
append
(
1
)
if
previous
==
')'
:
closingStart
.
append
(
i
-
1
)
# We have something like (...(
if
len
(
openingStart
)
and
openingStart
[
-
1
]
==
opened
[
-
1
]:
# Create a new loop starting with this component.
loops
.
append
([(
openingStart
[
-
1
],
i
)])
openingStart
.
pop
(
-
1
)
loopsUnclosed
+=
1
# We have something like )...( or even )(
if
len
(
closingStart
)
and
closingStart
[
-
1
]
==
lastclosed
:
# Append a component to existing multiloop
loops
[
-
1
]
.
append
((
closingStart
[
-
1
],
i
))
closingStart
.
pop
(
-
1
)
opened
.
append
(
i
)
# Closing basepair
if
s
[
i
]
==
')'
:
if
previous
==
')'
:
consecutiveClosings
+=
1
else
:
consecutiveClosings
=
1
# This is not supposed to happen in real data, but whatever.
if
previous
==
'('
:
openingStart
.
append
(
i
-
1
)
# We have something like (...) or ()
if
len
(
openingStart
)
and
openingStart
[
-
1
]
==
opened
[
-
1
]:
# Create a new loop, and save it as already closed (HL)
loops
.
append
([(
openingStart
[
-
1
],
i
)])
openingStart
.
pop
(
-
1
)
resort
(
loopsUnclosed
)
# We have something like )...)
if
len
(
closingStart
)
and
closingStart
[
-
1
]
==
lastclosed
:
# Append a component to existing multiloop and close it.
loops
[
-
1
]
.
append
((
closingStart
[
-
1
],
i
))
closingStart
.
pop
(
-
1
)
loopsUnclosed
-=
1
resort
(
loopsUnclosed
)
if
i
+
1
<
len
(
s
):
if
s
[
i
+
1
]
!=
')'
:
# We are on something like: ).
# an openingStart has not been correctly detected, like in ...((((((...)))...)))
if
consecutiveClosings
<
consecutiveOpenings
[
-
1
]:
# Create a new loop (uncompleted)
loops
.
append
([(
opened
[
-
2
],
opened
[
-
1
])])
loopsUnclosed
+=
1
# We just completed an HL+stem, like ...(((...))).., we can forget its info
if
consecutiveClosings
==
consecutiveOpenings
[
-
1
]:
consecutiveClosings
=
0
consecutiveOpenings
.
pop
(
-
1
)
else
:
# There are still several basepairs to remember, forget only the processed ones, keep the others
consecutiveOpenings
[
-
1
]
-=
consecutiveClosings
consecutiveClosings
=
0
else
:
# We are on something like: ))
# we are on an closingStart that cannot be correctly detected, like in ...(((...(((...))))))
if
consecutiveClosings
==
consecutiveOpenings
[
-
1
]:
# Append a component to the uncomplete loop and close it.
loops
[
-
1
]
.
append
((
i
,
i
+
1
))
loopsUnclosed
-=
1
resort
(
loopsUnclosed
)
# Forget the info about the processed stem.
consecutiveClosings
=
0
consecutiveOpenings
.
pop
(
-
1
)
opened
.
pop
(
-
1
)
lastclosed
=
i
previous
=
s
[
i
]
# print(i,"=",s[i],"\t", "consec. Op=", consecutiveOpenings,"Cl=",consecutiveClosings)
return
(
loops
)
def
launch_JAR3D_worker
(
self
,
loop
):
# write motif to a file
newpath
=
getcwd
()
+
'/'
+
loop
.
header
[
1
:]
...
...
@@ -182,7 +331,7 @@ class BiorseoInstance:
HLs
=
[]
ILs
=
[]
for
ss
in
rnasubopt_preds
:
loop_candidates
=
enumerate_loops
(
ss
)
loop_candidates
=
self
.
enumerate_loops
(
ss
)
for
loop_candidate
in
loop_candidates
:
if
len
(
loop_candidate
)
==
1
and
loop_candidate
not
in
HLs
:
HLs
.
append
(
loop_candidate
)
...
...
@@ -199,7 +348,7 @@ class BiorseoInstance:
# Scanning loop subsequences against motif database
pool
=
MyPool
(
processes
=
cpu_count
())
insertion_sites
=
[
x
for
y
in
pool
.
map
(
launch_JAR3D_worker
,
loops
)
for
x
in
y
]
self
.
launch_JAR3D_worker
,
loops
)
for
x
in
y
]
insertion_sites
.
sort
(
reverse
=
True
)
# Writing results to CSV file
c
=
0
...
...
@@ -265,21 +414,21 @@ class BiorseoInstance:
def
execute_job
(
self
,
j
):
if
j
.
checkFunc_
is
not
None
:
if
j
.
checkFunc_
(
*
j
.
checkArgs_
):
running_stats
[
2
]
+=
1
print
(
"["
+
str
(
running_stats
[
0
]
+
running_stats
[
2
])
+
'/'
+
str
(
jobcount
)
+
"]
\t
Skipping a finished job"
)
self
.
running_stats
[
2
]
+=
1
print
(
"["
+
str
(
self
.
running_stats
[
0
]
+
self
.
running_stats
[
2
])
+
'/'
+
str
(
self
.
jobcount
)
+
"]
\t
Skipping a finished job"
)
return
0
running_stats
[
0
]
+=
1
self
.
running_stats
[
0
]
+=
1
if
len
(
j
.
cmd_
):
logfile
=
open
(
"log_of_the_run.sh"
,
'a'
)
logfile
.
write
(
" "
.
join
(
j
.
cmd_
))
logfile
.
write
(
"
\n
"
)
logfile
.
close
()
print
(
"["
+
str
(
running_stats
[
0
]
+
running_stats
[
2
])
+
'/'
+
str
(
jobcount
)
+
"]
\t
"
+
" "
.
join
(
j
.
cmd_
))
print
(
"["
+
str
(
self
.
running_stats
[
0
]
+
self
.
running_stats
[
2
])
+
'/'
+
str
(
self
.
jobcount
)
+
"]
\t
"
+
" "
.
join
(
j
.
cmd_
))
r
=
subprocess
.
call
(
j
.
cmd_
,
timeout
=
j
.
timeout_
)
elif
j
.
func_
is
not
None
:
print
(
"["
+
str
(
running_stats
[
0
]
+
running_stats
[
2
])
+
'/'
+
str
(
jobcount
)
+
print
(
"["
+
str
(
self
.
running_stats
[
0
]
+
self
.
running_stats
[
2
])
+
'/'
+
str
(
self
.
jobcount
)
+
"]
\t
"
+
j
.
func_
.
__name__
+
'('
+
", "
.
join
([
a
for
a
in
j
.
args_
])
+
')'
)
try
:
r
=
j
.
func_
(
*
j
.
args_
)
...
...
@@ -287,11 +436,11 @@ class BiorseoInstance:
r
=
1
pass
if
r
:
fails
.
append
(
j
)
running_stats
[
1
]
+=
1
self
.
fails
.
append
(
j
)
self
.
running_stats
[
1
]
+=
1
return
r
def
check_existence
(
self
,
datatype
,
method
,
function
,
with_PK
,
basename
):
def
check_
result_
existence
(
self
,
datatype
,
method
,
function
,
with_PK
,
basename
):
folder
=
self
.
outputf
+
"PK/"
if
with_PK
else
self
.
outputf
+
"noPK/"
if
datatype
==
"bgsu"
:
if
method
==
"jar3d"
:
...
...
@@ -311,7 +460,7 @@ class BiorseoInstance:
raise
"Unknown data type !"
return
path
.
isfile
(
folder
+
basename
+
extension
+
function
)
def
check_existence
(
self
,
datatype
,
method
,
basename
):
def
check_
csv_
existence
(
self
,
datatype
,
method
,
basename
):
if
datatype
==
"bgsu"
:
if
method
==
"jar3d"
:
extension
=
".sites.csv"
...
...
Please
register
or
login
to post a comment