Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
RNANetLegacy
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2020-04-18 14:15:38 +0000
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
959a521e610563f9a98852adeec9496c535c9d11
959a521e
1 parent
56fd6817
Sqlite connection inside Chain object
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
55 additions
and
38 deletions
RNAnet.py
RNAnet.py
View file @
959a521
...
...
@@ -202,7 +202,7 @@ class Chain:
notify
(
status
)
def
extract_3D_data
(
self
,
conn
):
def
extract_3D_data
(
self
):
""" Maps DSSR annotations to the chain. """
# Load the mmCIF annotations from file
...
...
@@ -407,20 +407,33 @@ class Chain:
df
[
'paired'
]
=
newpairs
# Saving to database
sql_execute
(
conn
,
f
"""
INSERT OR REPLACE INTO nucleotide
(chain_id, index_chain, nt_resnum, nt_name, nt_code, dbn, alpha, beta, gamma, delta, epsilon, zeta,
epsilon_zeta, bb_type, chi, glyco_bond, form, ssZp, Dp, eta, theta, eta_prime, theta_prime, eta_base, theta_base,
v0, v1, v2, v3, v4, amplitude, phase_angle, puckering, nt_align_code, is_A, is_C, is_G, is_U, is_other, nt_position,
paired, pair_type_LW, pair_type_DSSR, nb_interact)
VALUES ({self.db_chain_id}, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
;"""
,
many
=
True
,
data
=
list
(
df
.
to_records
(
index
=
False
))
)
notify
(
f
"Saved {self.chain_label} annotations to database."
)
with
sqlite3
.
connect
(
runDir
+
"/results/RNANet.db"
,
timeout
=
10.0
)
as
conn
:
# Register the chain in table chain
if
self
.
pdb_start
is
not
None
:
sql_execute
(
conn
,
f
""" INSERT OR REPLACE INTO chain
(structure_id, chain_name, pdb_start, pdb_end, reversed, rfam_acc, inferred, issue)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?);"""
,
data
=
(
str
(
self
.
pdb_id
),
str
(
self
.
pdb_chain_id
),
int
(
self
.
pdb_start
),
int
(
self
.
pdb_end
),
int
(
self
.
reversed
),
str
(
self
.
rfam_fam
),
int
(
self
.
inferred
),
int
(
self
.
delete_me
)))
else
:
sql_execute
(
conn
,
"INSERT OR REPLACE INTO chain (structure_id, chain_name, issue) VALUES (?, ?, ?);"
,
data
=
(
str
(
self
.
pdb_id
),
int
(
self
.
pdb_chain_id
),
int
(
self
.
delete_me
)
))
# get the chain id
self
.
db_chain_id
=
sql_ask_database
(
conn
,
f
"SELECT (chain_id) FROM chain WHERE structure_id='{self.pdb_id}' AND chain_name='{self.pdb_chain_id}';"
)[
0
][
0
]
# Add the nucleotides
sql_execute
(
conn
,
f
"""
INSERT OR REPLACE INTO nucleotide
(chain_id, index_chain, nt_resnum, nt_name, nt_code, dbn, alpha, beta, gamma, delta, epsilon, zeta,
epsilon_zeta, bb_type, chi, glyco_bond, form, ssZp, Dp, eta, theta, eta_prime, theta_prime, eta_base, theta_base,
v0, v1, v2, v3, v4, amplitude, phase_angle, puckering, nt_align_code, is_A, is_C, is_G, is_U, is_other, nt_position,
paired, pair_type_LW, pair_type_DSSR, nb_interact)
VALUES ({self.db_chain_id}, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
;"""
,
many
=
True
,
data
=
list
(
df
.
to_records
(
index
=
False
)),
warn_every
=
10
)
# Now load data from the database
self
.
seq
=
""
.
join
(
df
.
nt_code
)
self
.
seq_to_align
=
""
.
join
(
df
.
nt_align_code
)
...
...
@@ -988,7 +1001,8 @@ class Pipeline:
""" Extract the desired chain portions if asked,
and extract their informations from the JSON files to the database.
REQUIRES the previous definition of self.update, so call list_available_mappings() before."""
REQUIRES the previous definition of self.update, so call list_available_mappings() before.
SETS self.loaded_chains"""
# Prepare folders
if
self
.
EXTRACT_CHAINS
:
...
...
@@ -1004,13 +1018,15 @@ class Pipeline:
else
:
clist
=
self
.
update
for
c
in
clist
:
if
retry
:
c
.
delete_me
=
False
# give a second chance
if
(
c
.
chain_label
not
in
self
.
known_issues
)
or
not
self
.
USE_KNOWN_ISSUES
:
joblist
.
append
(
Job
(
function
=
work_build_chain
,
how_many_in_parallel
=
int
(
coeff_ncores
*
ncores
),
args
=
[
c
,
self
.
EXTRACT_CHAINS
,
self
.
KEEP_HETATM
,
retry
]))
try
:
results
=
execute_joblist
(
joblist
)
except
:
print
(
"Exiting"
)
print
(
"Exiting"
,
flush
=
True
)
exit
(
1
)
# If there were newly discovered problems, add this chain to the known issues
...
...
@@ -1023,7 +1039,7 @@ class Pipeline:
warn
(
f
"Adding {c[1].chain_label} to known issues."
)
ki
.
write
(
c
[
1
]
.
chain_label
+
'
\n
'
)
kir
.
write
(
c
[
1
]
.
chain_label
+
'
\n
'
+
c
[
1
]
.
error_messages
+
'
\n\n
'
)
sql_execute
(
conn
,
f
"UPDATE chain SET issue = 1 WHERE chain_id = ?;"
,
data
=
(
c
[
1
]
.
db_chain_id
,))
sql_execute
(
conn
,
f
"UPDATE chain SET issue = 1 WHERE chain_id = ?;"
,
data
=
(
c
[
1
]
.
db_chain_id
,))
ki
.
close
()
kir
.
close
()
...
...
@@ -1215,7 +1231,7 @@ class Pipeline:
r
=
sql_ask_database
(
conn
,
"""SELECT structure_id FROM structure
LEFT JOIN chain ON structure.pdb_id = chain.structure_id
WHERE chain_id IS NULL;"""
)
if
r
!=
(
None
,)
:
if
len
(
r
)
and
r
[
0
][
0
]
is
not
None
:
warn
(
"Structures without referenced chains have been detected"
)
for
x
in
r
:
print
(
x
)
...
...
@@ -1224,7 +1240,7 @@ class Pipeline:
r
=
sql_ask_database
(
conn
,
"""SELECT chain_id, structure_id FROM chain
LEFT JOIN structure ON chain.structure_id = structure.pdb_id
WHERE pdb_id IS NULL;"""
)
if
r
!=
(
None
,)
:
if
len
(
r
)
and
r
[
0
][
0
]
is
not
None
:
warn
(
"Chains without referenced structures have been detected"
)
for
x
in
r
:
print
(
x
)
...
...
@@ -1236,7 +1252,7 @@ class Pipeline:
FROM chain LEFT JOIN re_mapping
ON chain.chain_id = re_mapping.chain_id
WHERE remapping_id IS NULL GROUP BY rfam_acc;"""
)
if
r
!=
(
None
,)
:
if
len
(
r
)
and
r
[
0
][
0
]
is
not
None
:
warn
(
"Structures were not remapped:"
)
for
x
in
r
:
print
(
str
(
x
[
0
])
+
" chains of family "
+
x
[
1
])
...
...
@@ -1248,7 +1264,7 @@ class Pipeline:
LEFT JOIN align_column
ON re_mapping.index_ali=align_column.index_ali AND c.rfam_acc=align_column.rfam_acc
WHERE column_id IS NULL;"""
)
if
r
!=
(
None
,)
:
if
len
(
r
)
and
r
[
0
][
0
]
is
not
None
:
warn
(
"Structures were not remapped:"
)
for
x
in
r
:
print
(
x
)
...
...
@@ -1740,35 +1756,33 @@ def work_mmcif(pdb_id):
@trace_unhandled_exceptions
def
work_build_chain
(
c
,
extract
,
khetatm
,
retrying
=
False
):
""" Additionally adds all the desired information to a Chain object.
"""Reads information from JSON and save it to database.
If asked, also extracts the 3D chains from their original structure files.
"""
if
not
path
.
isfile
(
path_to_3D_data
+
"annotations/"
+
c
.
pdb_id
+
".json"
):
warn
(
f
"Could not find annotations for {c.chain_label}, ignoring it."
,
error
=
True
)
c
.
delete_me
=
True
c
.
error_messages
+=
f
"Could not download and/or find annotations for {c.chain_label}."
# extract the 3D descriptors
if
not
c
.
delete_me
:
c
.
extract_3D_data
()
# Register the chain, and get its chain_id
conn
=
sqlite3
.
connect
(
runDir
+
"/results/RNANet.db"
,
timeout
=
10.0
)
if
c
.
pdb_start
is
not
None
:
sql_execute
(
conn
,
f
""" INSERT OR REPLACE INTO chain
(structure_id, chain_name, pdb_start, pdb_end, reversed, rfam_acc, inferred, issue)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?);"""
,
data
=
(
str
(
c
.
pdb_id
),
str
(
c
.
pdb_chain_id
),
int
(
c
.
pdb_start
),
int
(
c
.
pdb_end
),
int
(
c
.
reversed
),
str
(
c
.
rfam_fam
),
int
(
c
.
inferred
),
int
(
c
.
delete_me
)))
# Small check
with
sqlite3
.
connect
(
runDir
+
"/results/RNANet.db"
,
timeout
=
10.0
)
as
conn
:
nnts
=
sql_ask_database
(
conn
,
f
"SELECT COUNT(nt_id) FROM nucleotide WHERE chain_id={c.db_chain_id};"
,
warn_every
=
10
)[
0
][
0
]
if
not
(
nnts
):
warn
(
f
"Nucleotides not inserted: {c.error_messages}"
)
c
.
delete_me
=
True
c
.
error_messages
=
"Nucleotides not inserted !"
else
:
sql_execute
(
conn
,
"INSERT OR REPLACE INTO chain (structure_id, chain_name, issue) VALUES (?, ?, ?);"
,
data
=
(
str
(
c
.
pdb_id
),
int
(
c
.
pdb_chain_id
),
int
(
c
.
delete_me
)))
c
.
db_chain_id
=
sql_ask_database
(
conn
,
f
"SELECT (chain_id) FROM chain WHERE structure_id='{c.pdb_id}' AND chain_name='{c.pdb_chain_id}';"
)[
0
][
0
]
notify
(
f
"Inserted {nnts} nucleotides to chain {c.chain_label}"
)
# extract the portion we want
if
extract
and
not
c
.
delete_me
:
c
.
extract
(
khetatm
)
# extract the 3D descriptors
if
not
c
.
delete_me
:
c
.
extract_3D_data
(
conn
)
conn
.
close
()
return
c
@trace_unhandled_exceptions
...
...
@@ -2080,6 +2094,8 @@ if __name__ == "__main__":
print
(
"> Storing results into"
,
runDir
+
"/results/RNANet.db"
)
# compute an update compared to what is in the table "chain"
#DEBUG: list everything
# pp.REUSE_ALL = True
pp
.
list_available_mappings
()
# ===========================================================================
...
...
@@ -2088,6 +2104,7 @@ if __name__ == "__main__":
# Download and annotate new RNA 3D chains (Chain objects in pp.update)
pp
.
dl_and_annotate
(
coeff_ncores
=
0.75
)
# At this point, the structure table is up to date
pp
.
build_chains
()
...
...
@@ -2114,7 +2131,7 @@ if __name__ == "__main__":
# ===========================================================================
pp
.
checkpoint_load_chains
()
# Get the list of Rfam families found
rfam_acc_to_download
=
{}
for
c
in
pp
.
loaded_chains
:
...
...
Please
register
or
login
to post a comment