recompute_some_chains.py
2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!python3
import subprocess, os, sys
# Put a list of problematic chains here, they will be properly deleted and recomputed
problems = [
"1k73_1_A",
"1k73_1_B"
]
path_to_3D_data = sys.argv[1]
path_to_seq_data = sys.argv[2]
for p in problems:
print()
print()
print()
print()
homology = ('-' in p)
# Remove the datapoints files and 3D files
subprocess.run(["rm", '-f', path_to_3D_data + f"/rna_mapped_to_Rfam/{p}.cif"])
files = [ f for f in os.listdir(path_to_3D_data + "/datapoints") if p in f ]
for f in files:
subprocess.run(["rm", '-f', path_to_3D_data + f"/datapoints/{f}"])
# Find more information
structure = p.split('_')[0]
chain = p.split('_')[2]
if homology:
families = [ f.split('.')[1] for f in files ] # The RFAM families this chain has been mapped onto
# Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
for fam in families:
command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc=\"{fam}\";"]
print(' '.join(command))
subprocess.run(command)
command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--extract", "--only", p]
else:
# Delete the chain from the database, and the associated nucleotides and re_mappings, using foreign keys
command = ["sqlite3", "results/RNANet.db", f"PRAGMA foreign_keys=ON; delete from chain where structure_id=\"{structure}\" and chain_name=\"{chain}\" and rfam_acc is null;"]
print(' '.join(command))
subprocess.run(command)
command = ["python3.8", "RNAnet.py", "--3d-folder", path_to_3D_data, "--seq-folder", path_to_seq_data, "-r", "20.0", "--no-homology", "--extract", "--only", p]
# Re-run RNANet
print('\n',' '.join(command),'\n')
subprocess.run(command)
# run statistics