Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANet
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2021-01-30 12:14:18 +0100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
dc3fd966c62aeb2a1039f42c1fa16a6e59a4d793
dc3fd966
1 parent
f6cb9697
MutableSeqs overload of ALignIO
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
21 deletions
RNAnet.py
RNAnet.py
View file @
dc3fd96
...
...
@@ -9,7 +9,6 @@ if a[0] != "3.8":
print
(
"Please use version 3.8 or newer."
)
exit
(
1
)
import
Bio
import
Bio.PDB
as
pdb
import
concurrent.futures
import
getopt
...
...
@@ -37,7 +36,10 @@ from time import sleep
from
tqdm
import
tqdm
from
setproctitle
import
setproctitle
from
Bio
import
AlignIO
,
SeqIO
from
Bio.Align
import
AlignInfo
from
Bio.SeqIO.FastaIO
import
FastaIterator
,
SimpleFastaParser
from
Bio.Seq
import
MutableSeq
from
Bio.SeqRecord
import
SeqRecord
from
Bio.Align
import
MultipleSeqAlignment
def
trace_unhandled_exceptions
(
func
):
@wraps
(
func
)
...
...
@@ -81,6 +83,30 @@ no_nts_set = set()
weird_mappings
=
set
()
class
MutableFastaIterator
(
FastaIterator
):
"""
Same as Biopython's FastaIterator, but uses Bio.Seq.MutableSeq objects instead of Bio.Seq.Seq.
"""
def
iterate
(
self
,
handle
):
"""Parse the file and generate SeqRecord objects."""
title2ids
=
self
.
title2ids
if
title2ids
:
for
title
,
sequence
in
SimpleFastaParser
(
handle
):
id
,
name
,
descr
=
title2ids
(
title
)
yield
SeqRecord
(
MutableSeq
(
sequence
),
id
=
id
,
name
=
name
,
description
=
descr
)
else
:
for
title
,
sequence
in
SimpleFastaParser
(
handle
):
try
:
first_word
=
title
.
split
(
None
,
1
)[
0
]
except
IndexError
:
assert
not
title
,
repr
(
title
)
# Should we use SeqRecord default for no ID?
first_word
=
""
yield
SeqRecord
(
MutableSeq
(
sequence
),
id
=
first_word
,
name
=
first_word
,
description
=
title
,
)
class
SelectivePortionSelector
(
object
):
"""Class passed to MMCIFIO to select some chain portions in an MMCIF file.
...
...
@@ -1532,13 +1558,11 @@ def read_cpu_number():
p
=
subprocess
.
run
([
'grep'
,
'-Ec'
,
'(Intel|AMD)'
,
'/proc/cpuinfo'
],
stdout
=
subprocess
.
PIPE
)
return
int
(
int
(
p
.
stdout
.
decode
(
'utf-8'
)[:
-
1
])
/
2
)
def
init_worker
(
tqdm_lock
=
None
):
signal
.
signal
(
signal
.
SIGINT
,
signal
.
SIG_IGN
)
if
tqdm_lock
is
not
None
:
tqdm
.
set_lock
(
tqdm_lock
)
def
warn
(
message
,
error
=
False
):
"""Pretty-print warnings and error messages.
"""
...
...
@@ -1557,12 +1581,32 @@ def warn(message, error=False):
else
:
print
(
f
"
\t
>
\033
[33mWARN: {message:64s}
\033
[0m
\t
{warnsymb}"
,
flush
=
True
)
def
notify
(
message
,
post
=
''
):
if
len
(
post
):
post
=
'('
+
post
+
')'
print
(
f
"
\t
> {message:70s}
\t
{validsymb}
\t
{post}"
,
flush
=
True
)
def
_mutable_SeqIO_to_alignment_iterator
(
handle
):
records
=
list
(
MutableFastaIterator
(
handle
))
if
records
:
yield
MultipleSeqAlignment
(
records
)
def
parse
(
handle
):
with
open
(
handle
,
'r'
)
as
fp
:
yield
from
_mutable_SeqIO_to_alignment_iterator
(
fp
)
def
read
(
handle
):
iterator
=
parse
(
handle
)
try
:
alignment
=
next
(
iterator
)
except
StopIteration
:
raise
ValueError
(
"No records found in handle"
)
from
None
try
:
next
(
iterator
)
raise
ValueError
(
"More than one record found in handle"
)
except
StopIteration
:
pass
return
alignment
def
sql_define_tables
(
conn
):
conn
.
executescript
(
...
...
@@ -1673,7 +1717,6 @@ def sql_define_tables(conn):
"""
)
conn
.
commit
()
@trace_unhandled_exceptions
def
sql_ask_database
(
conn
,
sql
,
warn_every
=
10
):
"""
...
...
@@ -1695,7 +1738,6 @@ def sql_ask_database(conn, sql, warn_every=10):
warn
(
"Tried to reach database 100 times and failed. Aborting."
,
error
=
True
)
return
[]
@trace_unhandled_exceptions
def
sql_execute
(
conn
,
sql
,
many
=
False
,
data
=
None
,
warn_every
=
10
):
for
_
in
range
(
100
):
# retry 100 times if it fails
...
...
@@ -1718,7 +1760,6 @@ def sql_execute(conn, sql, many=False, data=None, warn_every=10):
time
.
sleep
(
0.2
)
warn
(
"Tried to reach database 100 times and failed. Aborting."
,
error
=
True
)
@trace_unhandled_exceptions
def
execute_job
(
j
,
jobcount
):
"""Run a Job object.
...
...
@@ -1780,7 +1821,6 @@ def execute_job(j, jobcount):
t
=
end_time
-
start_time
return
(
t
,
m
,
r
)
def
execute_joblist
(
fulljoblist
):
""" Run a list of job objects.
...
...
@@ -1847,7 +1887,6 @@ def execute_joblist(fulljoblist):
# throw back the money
return
results
@trace_unhandled_exceptions
def
work_infer_mappings
(
update_only
,
allmappings
,
fullinference
,
codelist
)
->
list
:
"""Given a list of PDB chains corresponding to an equivalence class from BGSU's NR list,
...
...
@@ -1999,7 +2038,6 @@ def work_infer_mappings(update_only, allmappings, fullinference, codelist) -> li
return
newchains
@trace_unhandled_exceptions
def
work_mmcif
(
pdb_id
):
""" Look for a CIF file (with all chains) from RCSB
...
...
@@ -2078,7 +2116,6 @@ def work_mmcif(pdb_id):
return
0
@trace_unhandled_exceptions
def
work_build_chain
(
c
,
extract
,
khetatm
,
retrying
=
False
,
save_logs
=
True
):
"""Reads information from JSON and save it to database.
...
...
@@ -2115,7 +2152,6 @@ def work_build_chain(c, extract, khetatm, retrying=False, save_logs=True):
return
c
@trace_unhandled_exceptions
def
work_prepare_sequences
(
dl
,
rfam_acc
,
chains
):
"""Prepares FASTA files of homologous sequences to realign with cmalign or SINA.
...
...
@@ -2204,7 +2240,6 @@ def work_prepare_sequences(dl, rfam_acc, chains):
# print some stats
notify
(
status
)
@trace_unhandled_exceptions
def
work_realign
(
rfam_acc
):
""" Runs multiple sequence alignements by RNA family.
...
...
@@ -2315,7 +2350,6 @@ def work_realign(rfam_acc):
with
open
(
runDir
+
"/errors.txt"
,
"a"
)
as
er
:
er
.
write
(
f
"Failed to realign {rfam_acc} (killed)"
)
@trace_unhandled_exceptions
def
work_pssm_remap
(
f
):
"""Computes Position-Specific-Scoring-Matrices given the multiple sequence alignment of the RNA family.
...
...
@@ -2330,10 +2364,6 @@ def work_pssm_remap(f):
global
idxQueue
thr_idx
=
idxQueue
.
get
()
# get the chains of this family in the update
list_of_chains
=
rfam_acc_to_download
[
f
]
chains_ids
=
[
str
(
c
)
for
c
in
list_of_chains
]
##########################################################################################
# Compute frequencies in the alignment
##########################################################################################
...
...
@@ -2342,7 +2372,7 @@ def work_pssm_remap(f):
# Open the alignment
try
:
align
=
AlignIO
.
read
(
path_to_seq_data
+
f
"realigned/{f}++.afa"
,
"fasta"
)
align
=
read
(
path_to_seq_data
+
f
"realigned/{f}++.afa"
)
# This is our custom AlignIO overload which uses MutableSeq instead of Seq
except
:
warn
(
f
"{f}'s alignment is wrong. Recompute it and retry."
,
error
=
True
)
with
open
(
runDir
+
"/errors.txt"
,
"a"
)
as
errf
:
...
...
@@ -2458,6 +2488,7 @@ def work_pssm_remap(f):
if
j
<
ncols
and
s
.
seq
[
j
]
==
'.'
:
re_mappings
.
append
((
db_id
,
i
+
1
,
j
+
1
))
columns_to_save
.
add
(
j
+
1
)
s
.
seq
[
j
]
=
'-'
# We replace the insertion gap by a real gap (thanks to MutableSeqs)
i
+=
1
j
+=
1
continue
...
...
@@ -2543,7 +2574,6 @@ def work_pssm_remap(f):
idxQueue
.
put
(
thr_idx
)
# replace the thread index in the queue
return
0
@trace_unhandled_exceptions
def
work_save
(
c
,
homology
=
True
):
...
...
Please
register
or
login
to post a comment