Nathalie BERNARD

Ajout de fonctions pour faire une liste des motifs qui ne match qu'avec leurs séquence d'origine

...@@ -17,7 +17,7 @@ That script will remove from the library all the pattern that match ONLY with th ...@@ -17,7 +17,7 @@ That script will remove from the library all the pattern that match ONLY with th
17 17
18 vector<string> get_list_pdb_benchmark(const string& benchmark) { 18 vector<string> get_list_pdb_benchmark(const string& benchmark) {
19 19
20 - ifstream bm(benchmark); 20 + fstream bm(benchmark);
21 vector<string> list_pdb; 21 vector<string> list_pdb;
22 if (bm.is_open()) { 22 if (bm.is_open()) {
23 string name; 23 string name;
...@@ -27,7 +27,7 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { ...@@ -27,7 +27,7 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) {
27 27
28 while (getline(bm, name)) { 28 while (getline(bm, name)) {
29 int size = name.size(); 29 int size = name.size();
30 - name = name.substr(5,size-8); 30 + name = name.substr(5,size-6);
31 list_pdb.push_back(name); 31 list_pdb.push_back(name);
32 32
33 getline(bm, sequence); 33 getline(bm, sequence);
...@@ -45,18 +45,20 @@ string trim(string str) { ...@@ -45,18 +45,20 @@ string trim(string str) {
45 return str; 45 return str;
46 } 46 }
47 47
48 -bool find_id_pattern(string& pdb_pattern, const string& benchmark) { 48 +string find_id_pattern(string& pdb_pattern, const string& benchmark) {
49 vector<string> l = get_list_pdb_benchmark(benchmark); 49 vector<string> l = get_list_pdb_benchmark(benchmark);
50 for (string pdb_bm : l) { 50 for (string pdb_bm : l) {
51 - if (!pdb_bm.compare(pdb_pattern)) { 51 + int size = pdb_bm.size();
52 - return true; 52 + string cmp = pdb_bm.substr(0, size-2);
53 + if (!cmp.compare(pdb_pattern)) {
54 + return pdb_bm;
53 } 55 }
54 } 56 }
55 - return false; 57 + return string();
56 } 58 }
57 59
58 vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { 60 vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) {
59 - std::ifstream lib(bibli); 61 + ifstream lib(bibli);
60 json js = json::parse(lib); 62 json js = json::parse(lib);
61 63
62 vector<pair<string, string>> association; 64 vector<pair<string, string>> association;
...@@ -71,9 +73,10 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar ...@@ -71,9 +73,10 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar
71 ostringstream stream; 73 ostringstream stream;
72 stream << js[id][field][i]; 74 stream << js[id][field][i];
73 string pdb = trim(stream.str()); 75 string pdb = trim(stream.str());
74 - if (find_id_pattern(pdb, benchmark)) { 76 + string pdb_complete = find_id_pattern(pdb, benchmark);
77 + if (!(pdb_complete.empty())) {
75 pair<string, string> p; 78 pair<string, string> p;
76 - p.first = pdb; 79 + p.first = pdb_complete;
77 p.second = id; 80 p.second = id;
78 association.push_back(p); 81 association.push_back(p);
79 } 82 }
...@@ -81,20 +84,97 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar ...@@ -81,20 +84,97 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar
81 } 84 }
82 } 85 }
83 } 86 }
84 -
85 lib.close(); 87 lib.close();
86 return association; 88 return association;
87 } 89 }
88 90
91 +bool does_it_match(const string& result, const string& id_motif) {
92 + ifstream f_res(result);
93 + if (f_res.is_open()) {
94 + string name;
95 + string seq;
96 + string struc;
97 + string contacts;
98 +
99 + getline(f_res, name);
100 + getline(f_res, seq);
101 + while (getline(f_res, struc)) {
102 + string motif_json = "JSON" + id_motif + " +";
103 + if(struc.find(motif_json, 0) != string::npos) {
104 + return true;
105 + }
106 + motif_json = "JSON" + id_motif + "\n";
107 + if(struc.find(motif_json, 0) != string::npos) {
108 + return true;
109 + }
110 + getline(f_res,contacts);
111 + }
112 + f_res.close();
113 + }
114 + return false;
115 +}
116 +
117 +vector<string> select_not_motif(const string& bibli, const string& benchmark) {
118 + vector<string> selection;
119 + vector<pair<string, string>> association = find_id(bibli, benchmark);
120 + vector<string> list_bm = get_list_pdb_benchmark(benchmark);
121 +
122 + string path_begin = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_";
123 + string path_MFE_F = ".json_pmF_MEA";
124 +
125 + for (pair<string, string> p : association) {
126 + string id_motif = p.second;
127 + selection.push_back(id_motif);
128 + }
129 + for (pair<string, string> p : association) {
130 + cout << p.first << ", " << p.second << endl;
131 + }
132 + cout << "size: " << association.size() << endl;
133 +
134 + for (string pdb : list_bm) {
135 + string path_result = path_begin + pdb + path_MFE_F;
136 + for (pair<string,string> pair : association) {
137 + if (pair.first.substr(0, pair.first.size()-2).compare(pdb.substr(0, pdb.size()-2)) != 0) {
138 + bool test = does_it_match(path_result, pair.second);
139 +
140 + if (test) {
141 + //if (!(pair.second.compare("954"))) { cout << "p1: " << pair.first << "pdb: " << pdb << endl;}
142 + auto position = find(selection.begin(), selection.end(), pair.second);
143 + if (position != selection.end()) {
144 + int index = position - selection.begin();
145 + selection.erase(selection.begin() + index);
146 + }
147 + }
148 + }
149 + }
150 + }
151 + sort(selection.begin(), selection.end() );
152 + selection.erase(unique(selection.begin(), selection.end() ), selection.end() );
153 +
154 + cout << "size: " << selection.size() << endl;
155 +
156 + return selection;
157 +}
158 +
89 int main() 159 int main()
90 { 160 {
91 string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; 161 string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
92 string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; 162 string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
93 163
94 - vector<pair<string, string>> association = find_id(bibli, benchmark); 164 + /*vector<pair<string, string>> association = find_id(bibli, benchmark);
95 - /*for (pair<string,string> p : association) { 165 + for (pair<string,string> p : association) {
96 cout << "<" << p.first << ", " << p.second << ">" << endl; 166 cout << "<" << p.first << ", " << p.second << ">" << endl;
97 }*/ 167 }*/
98 168
169 + vector<string> selection = select_not_motif(bibli, benchmark);
170 + for (string str : selection) {
171 + cout << str << ", ";
172 + }
173 + cout << endl;
174 +
175 + /*string result = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_1U6P_B.json_pmF_MEA";
176 + bool test = does_it_match(result, "150");
177 + cout << "test : " << test << endl;*/
178 +
99 return 0; 179 return 0;
100 } 180 }
...\ No newline at end of file ...\ No newline at end of file
......