Ajout du début d'un script qui permettrai de retirer les motifs qui ne match qu'…
…avec leurs séquence d'origine
Showing
1 changed file
with
100 additions
and
0 deletions
cppsrc/Scripts/selecting_id.cpp
0 → 100644
1 | +#include <iostream> | ||
2 | +#include <sstream> | ||
3 | +#include <fstream> | ||
4 | +#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp" | ||
5 | +#include <typeinfo> | ||
6 | +#include <set> | ||
7 | +#include <algorithm> | ||
8 | +#include <cstdio> | ||
9 | +#include <vector> | ||
10 | + | ||
11 | +using namespace std; | ||
12 | +using json = nlohmann::json; | ||
13 | + | ||
14 | +/* | ||
15 | +That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. | ||
16 | +*/ | ||
17 | + | ||
18 | +vector<string> get_list_pdb_benchmark(const string& benchmark) { | ||
19 | + | ||
20 | + ifstream bm(benchmark); | ||
21 | + vector<string> list_pdb; | ||
22 | + if (bm.is_open()) { | ||
23 | + string name; | ||
24 | + string sequence; | ||
25 | + string structure; | ||
26 | + string contacts; | ||
27 | + | ||
28 | + while (getline(bm, name)) { | ||
29 | + int size = name.size(); | ||
30 | + name = name.substr(5,size-8); | ||
31 | + list_pdb.push_back(name); | ||
32 | + | ||
33 | + getline(bm, sequence); | ||
34 | + getline(bm, structure); | ||
35 | + getline(bm, contacts); | ||
36 | + } | ||
37 | + bm.close(); | ||
38 | + } | ||
39 | + return list_pdb; | ||
40 | +} | ||
41 | + | ||
42 | +string trim(string str) { | ||
43 | + int size = str.size(); | ||
44 | + str = str.substr(1, size-2); | ||
45 | + return str; | ||
46 | +} | ||
47 | + | ||
48 | +bool find_id_pattern(string& pdb_pattern, const string& benchmark) { | ||
49 | + vector<string> l = get_list_pdb_benchmark(benchmark); | ||
50 | + for (string pdb_bm : l) { | ||
51 | + if (!pdb_bm.compare(pdb_pattern)) { | ||
52 | + return true; | ||
53 | + } | ||
54 | + } | ||
55 | + return false; | ||
56 | +} | ||
57 | + | ||
58 | +vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { | ||
59 | + std::ifstream lib(bibli); | ||
60 | + json js = json::parse(lib); | ||
61 | + | ||
62 | + vector<pair<string, string>> association; | ||
63 | + | ||
64 | + for (auto it = js.begin(); it != js.end(); ++it) { | ||
65 | + string id = it.key(); | ||
66 | + for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | ||
67 | + string field = it2.key(); | ||
68 | + if (!field.compare("pdb")) { | ||
69 | + int n = js[id][field].size(); | ||
70 | + for (int i = 0; i < n ; i++) { | ||
71 | + ostringstream stream; | ||
72 | + stream << js[id][field][i]; | ||
73 | + string pdb = trim(stream.str()); | ||
74 | + if (find_id_pattern(pdb, benchmark)) { | ||
75 | + pair<string, string> p; | ||
76 | + p.first = pdb; | ||
77 | + p.second = id; | ||
78 | + association.push_back(p); | ||
79 | + } | ||
80 | + } | ||
81 | + } | ||
82 | + } | ||
83 | + } | ||
84 | + | ||
85 | + lib.close(); | ||
86 | + return association; | ||
87 | +} | ||
88 | + | ||
89 | +int main() | ||
90 | +{ | ||
91 | + string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | ||
92 | + string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | ||
93 | + | ||
94 | + vector<pair<string, string>> association = find_id(bibli, benchmark); | ||
95 | + /*for (pair<string,string> p : association) { | ||
96 | + cout << "<" << p.first << ", " << p.second << ">" << endl; | ||
97 | + }*/ | ||
98 | + | ||
99 | + return 0; | ||
100 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment