Louis BECQUEY

Statistics over the produced data

...@@ -3,107 +3,11 @@ nohup.out ...@@ -3,107 +3,11 @@ nohup.out
3 jobstats.csv 3 jobstats.csv
4 log_of_the_run.sh 4 log_of_the_run.sh
5 5
6 -# Byte-compiled / optimized / DLL files 6 +# temporary results files
7 -__pycache__/ 7 +data/*.npy
8 -*.py[cod] 8 +data/*.npz
9 -*$py.class 9 +data/olddata
10 - 10 +
11 -# C extensions 11 +# environment stuff
12 -*.so 12 +.vscode/
13 - 13 +*.pyc
14 -# Distribution / packaging
15 -.Python
16 -build/
17 -develop-eggs/
18 -dist/
19 -downloads/
20 -eggs/
21 -.eggs/
22 -lib/
23 -lib64/
24 -parts/
25 -sdist/
26 -var/
27 -wheels/
28 -*.egg-info/
29 -.installed.cfg
30 -*.egg
31 -MANIFEST
32 -
33 -# PyInstaller
34 -# Usually these files are written by a python script from a template
35 -# before PyInstaller builds the exe, so as to inject date/other infos into it.
36 -*.manifest
37 -*.spec
38 -
39 -# Installer logs
40 -pip-log.txt
41 -pip-delete-this-directory.txt
42 -
43 -# Unit test / coverage reports
44 -htmlcov/
45 -.tox/
46 -.coverage
47 -.coverage.*
48 -.cache
49 -nosetests.xml
50 -coverage.xml
51 -*.cover
52 -.hypothesis/
53 -.pytest_cache/
54 -
55 -# Translations
56 -*.mo
57 -*.pot
58 -
59 -# Django stuff:
60 -*.log
61 -local_settings.py
62 -db.sqlite3
63 -
64 -# Flask stuff:
65 -instance/
66 -.webassets-cache
67 -
68 -# Scrapy stuff:
69 -.scrapy
70 -
71 -# Sphinx documentation
72 -docs/_build/
73 -
74 -# PyBuilder
75 -target/
76 -
77 -# Jupyter Notebook
78 -.ipynb_checkpoints
79 -
80 -# pyenv
81 -.python-version
82 -
83 -# celery beat schedule file
84 -celerybeat-schedule
85 -
86 -# SageMath parsed files
87 -*.sage.py
88 -
89 -# Environments
90 -.env
91 -.venv
92 -env/
93 -venv/
94 -ENV/
95 -env.bak/
96 -venv.bak/
97 -
98 -# Spyder project settings
99 -.spyderproject
100 -.spyproject
101 -
102 -# Rope project settings
103 -.ropeproject
104 -
105 -# mkdocs documentation
106 -/site
107 -
108 -# mypy
109 -.mypy_cache/
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -916,7 +916,7 @@ def execute_joblist(fulljoblist, printstats=False): ...@@ -916,7 +916,7 @@ def execute_joblist(fulljoblist, printstats=False):
916 916
917 if printstats: 917 if printstats:
918 # Write statistics in a file (header here) 918 # Write statistics in a file (header here)
919 - f = open("jobstats.csv", "w") 919 + f = open("data/jobstats.csv", "w")
920 f.write("label,comp_time,max_mem\n") 920 f.write("label,comp_time,max_mem\n")
921 f.close() 921 f.close()
922 922
...@@ -948,7 +948,7 @@ def execute_joblist(fulljoblist, printstats=False): ...@@ -948,7 +948,7 @@ def execute_joblist(fulljoblist, printstats=False):
948 mems = [ r[1] for r in raw_results ] 948 mems = [ r[1] for r in raw_results ]
949 949
950 # Write them to file 950 # Write them to file
951 - f = open("jobstats.csv", "a") 951 + f = open("data/jobstats.csv", "a")
952 for j, t, m in zip(bunch, times, mems): 952 for j, t, m in zip(bunch, times, mems):
953 j.comp_time = t 953 j.comp_time = t
954 j.max_mem = m 954 j.max_mem = m
...@@ -1636,7 +1636,7 @@ if __name__ == "__main__": ...@@ -1636,7 +1636,7 @@ if __name__ == "__main__":
1636 n_pdb = [ len(rfam_acc_to_download[f]) for f in fam_stats["rfam_acc"] ] 1636 n_pdb = [ len(rfam_acc_to_download[f]) for f in fam_stats["rfam_acc"] ]
1637 fam_stats["n_pdb_seqs"] = n_pdb 1637 fam_stats["n_pdb_seqs"] = n_pdb
1638 fam_stats["total_seqs"] = fam_stats["n_seq"] + fam_stats["n_pdb_seqs"] 1638 fam_stats["total_seqs"] = fam_stats["n_seq"] + fam_stats["n_pdb_seqs"]
1639 - fam_stats.to_csv(path_to_seq_data + "realigned/statistics.csv") 1639 + fam_stats.to_csv(path_to_seq_data + "data/statistics.csv")
1640 # print the stats 1640 # print the stats
1641 for f in fam_list: 1641 for f in fam_list:
1642 line = fam_stats[fam_stats["rfam_acc"]==f] 1642 line = fam_stats[fam_stats["rfam_acc"]==f]
......
This diff is collapsed. Click to expand it.