Ajout de fonctions pour faire une liste des motifs qui ne match qu'avec leurs séquence d'origine
Showing
1 changed file
with
92 additions
and
12 deletions
... | @@ -17,7 +17,7 @@ That script will remove from the library all the pattern that match ONLY with th | ... | @@ -17,7 +17,7 @@ That script will remove from the library all the pattern that match ONLY with th |
17 | 17 | ||
18 | vector<string> get_list_pdb_benchmark(const string& benchmark) { | 18 | vector<string> get_list_pdb_benchmark(const string& benchmark) { |
19 | 19 | ||
20 | - ifstream bm(benchmark); | 20 | + fstream bm(benchmark); |
21 | vector<string> list_pdb; | 21 | vector<string> list_pdb; |
22 | if (bm.is_open()) { | 22 | if (bm.is_open()) { |
23 | string name; | 23 | string name; |
... | @@ -27,7 +27,7 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { | ... | @@ -27,7 +27,7 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { |
27 | 27 | ||
28 | while (getline(bm, name)) { | 28 | while (getline(bm, name)) { |
29 | int size = name.size(); | 29 | int size = name.size(); |
30 | - name = name.substr(5,size-8); | 30 | + name = name.substr(5,size-6); |
31 | list_pdb.push_back(name); | 31 | list_pdb.push_back(name); |
32 | 32 | ||
33 | getline(bm, sequence); | 33 | getline(bm, sequence); |
... | @@ -45,18 +45,20 @@ string trim(string str) { | ... | @@ -45,18 +45,20 @@ string trim(string str) { |
45 | return str; | 45 | return str; |
46 | } | 46 | } |
47 | 47 | ||
48 | -bool find_id_pattern(string& pdb_pattern, const string& benchmark) { | 48 | +string find_id_pattern(string& pdb_pattern, const string& benchmark) { |
49 | vector<string> l = get_list_pdb_benchmark(benchmark); | 49 | vector<string> l = get_list_pdb_benchmark(benchmark); |
50 | for (string pdb_bm : l) { | 50 | for (string pdb_bm : l) { |
51 | - if (!pdb_bm.compare(pdb_pattern)) { | 51 | + int size = pdb_bm.size(); |
52 | - return true; | 52 | + string cmp = pdb_bm.substr(0, size-2); |
53 | + if (!cmp.compare(pdb_pattern)) { | ||
54 | + return pdb_bm; | ||
53 | } | 55 | } |
54 | } | 56 | } |
55 | - return false; | 57 | + return string(); |
56 | } | 58 | } |
57 | 59 | ||
58 | vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { | 60 | vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { |
59 | - std::ifstream lib(bibli); | 61 | + ifstream lib(bibli); |
60 | json js = json::parse(lib); | 62 | json js = json::parse(lib); |
61 | 63 | ||
62 | vector<pair<string, string>> association; | 64 | vector<pair<string, string>> association; |
... | @@ -71,9 +73,10 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar | ... | @@ -71,9 +73,10 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar |
71 | ostringstream stream; | 73 | ostringstream stream; |
72 | stream << js[id][field][i]; | 74 | stream << js[id][field][i]; |
73 | string pdb = trim(stream.str()); | 75 | string pdb = trim(stream.str()); |
74 | - if (find_id_pattern(pdb, benchmark)) { | 76 | + string pdb_complete = find_id_pattern(pdb, benchmark); |
77 | + if (!(pdb_complete.empty())) { | ||
75 | pair<string, string> p; | 78 | pair<string, string> p; |
76 | - p.first = pdb; | 79 | + p.first = pdb_complete; |
77 | p.second = id; | 80 | p.second = id; |
78 | association.push_back(p); | 81 | association.push_back(p); |
79 | } | 82 | } |
... | @@ -81,20 +84,97 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar | ... | @@ -81,20 +84,97 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar |
81 | } | 84 | } |
82 | } | 85 | } |
83 | } | 86 | } |
84 | - | ||
85 | lib.close(); | 87 | lib.close(); |
86 | return association; | 88 | return association; |
87 | } | 89 | } |
88 | 90 | ||
91 | +bool does_it_match(const string& result, const string& id_motif) { | ||
92 | + ifstream f_res(result); | ||
93 | + if (f_res.is_open()) { | ||
94 | + string name; | ||
95 | + string seq; | ||
96 | + string struc; | ||
97 | + string contacts; | ||
98 | + | ||
99 | + getline(f_res, name); | ||
100 | + getline(f_res, seq); | ||
101 | + while (getline(f_res, struc)) { | ||
102 | + string motif_json = "JSON" + id_motif + " +"; | ||
103 | + if(struc.find(motif_json, 0) != string::npos) { | ||
104 | + return true; | ||
105 | + } | ||
106 | + motif_json = "JSON" + id_motif + "\n"; | ||
107 | + if(struc.find(motif_json, 0) != string::npos) { | ||
108 | + return true; | ||
109 | + } | ||
110 | + getline(f_res,contacts); | ||
111 | + } | ||
112 | + f_res.close(); | ||
113 | + } | ||
114 | + return false; | ||
115 | +} | ||
116 | + | ||
117 | +vector<string> select_not_motif(const string& bibli, const string& benchmark) { | ||
118 | + vector<string> selection; | ||
119 | + vector<pair<string, string>> association = find_id(bibli, benchmark); | ||
120 | + vector<string> list_bm = get_list_pdb_benchmark(benchmark); | ||
121 | + | ||
122 | + string path_begin = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_"; | ||
123 | + string path_MFE_F = ".json_pmF_MEA"; | ||
124 | + | ||
125 | + for (pair<string, string> p : association) { | ||
126 | + string id_motif = p.second; | ||
127 | + selection.push_back(id_motif); | ||
128 | + } | ||
129 | + for (pair<string, string> p : association) { | ||
130 | + cout << p.first << ", " << p.second << endl; | ||
131 | + } | ||
132 | + cout << "size: " << association.size() << endl; | ||
133 | + | ||
134 | + for (string pdb : list_bm) { | ||
135 | + string path_result = path_begin + pdb + path_MFE_F; | ||
136 | + for (pair<string,string> pair : association) { | ||
137 | + if (pair.first.substr(0, pair.first.size()-2).compare(pdb.substr(0, pdb.size()-2)) != 0) { | ||
138 | + bool test = does_it_match(path_result, pair.second); | ||
139 | + | ||
140 | + if (test) { | ||
141 | + //if (!(pair.second.compare("954"))) { cout << "p1: " << pair.first << "pdb: " << pdb << endl;} | ||
142 | + auto position = find(selection.begin(), selection.end(), pair.second); | ||
143 | + if (position != selection.end()) { | ||
144 | + int index = position - selection.begin(); | ||
145 | + selection.erase(selection.begin() + index); | ||
146 | + } | ||
147 | + } | ||
148 | + } | ||
149 | + } | ||
150 | + } | ||
151 | + sort(selection.begin(), selection.end() ); | ||
152 | + selection.erase(unique(selection.begin(), selection.end() ), selection.end() ); | ||
153 | + | ||
154 | + cout << "size: " << selection.size() << endl; | ||
155 | + | ||
156 | + return selection; | ||
157 | +} | ||
158 | + | ||
89 | int main() | 159 | int main() |
90 | { | 160 | { |
91 | string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | 161 | string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; |
92 | string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | 162 | string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; |
93 | 163 | ||
94 | - vector<pair<string, string>> association = find_id(bibli, benchmark); | 164 | + /*vector<pair<string, string>> association = find_id(bibli, benchmark); |
95 | - /*for (pair<string,string> p : association) { | 165 | + for (pair<string,string> p : association) { |
96 | cout << "<" << p.first << ", " << p.second << ">" << endl; | 166 | cout << "<" << p.first << ", " << p.second << ">" << endl; |
97 | }*/ | 167 | }*/ |
98 | 168 | ||
169 | + vector<string> selection = select_not_motif(bibli, benchmark); | ||
170 | + for (string str : selection) { | ||
171 | + cout << str << ", "; | ||
172 | + } | ||
173 | + cout << endl; | ||
174 | + | ||
175 | + /*string result = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_1U6P_B.json_pmF_MEA"; | ||
176 | + bool test = does_it_match(result, "150"); | ||
177 | + cout << "test : " << test << endl;*/ | ||
178 | + | ||
99 | return 0; | 179 | return 0; |
100 | } | 180 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment