Nathalie BERNARD

Ajout de fonctions pour faire une liste des motifs qui ne match qu'avec leurs séquence d'origine

......@@ -17,7 +17,7 @@ That script will remove from the library all the pattern that match ONLY with th
vector<string> get_list_pdb_benchmark(const string& benchmark) {
ifstream bm(benchmark);
fstream bm(benchmark);
vector<string> list_pdb;
if (bm.is_open()) {
string name;
......@@ -27,7 +27,7 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) {
while (getline(bm, name)) {
int size = name.size();
name = name.substr(5,size-8);
name = name.substr(5,size-6);
list_pdb.push_back(name);
getline(bm, sequence);
......@@ -45,18 +45,20 @@ string trim(string str) {
return str;
}
bool find_id_pattern(string& pdb_pattern, const string& benchmark) {
string find_id_pattern(string& pdb_pattern, const string& benchmark) {
vector<string> l = get_list_pdb_benchmark(benchmark);
for (string pdb_bm : l) {
if (!pdb_bm.compare(pdb_pattern)) {
return true;
int size = pdb_bm.size();
string cmp = pdb_bm.substr(0, size-2);
if (!cmp.compare(pdb_pattern)) {
return pdb_bm;
}
}
return false;
return string();
}
vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) {
std::ifstream lib(bibli);
ifstream lib(bibli);
json js = json::parse(lib);
vector<pair<string, string>> association;
......@@ -71,9 +73,10 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar
ostringstream stream;
stream << js[id][field][i];
string pdb = trim(stream.str());
if (find_id_pattern(pdb, benchmark)) {
string pdb_complete = find_id_pattern(pdb, benchmark);
if (!(pdb_complete.empty())) {
pair<string, string> p;
p.first = pdb;
p.first = pdb_complete;
p.second = id;
association.push_back(p);
}
......@@ -81,20 +84,97 @@ vector<pair<string, string>> find_id(const string& bibli, const string& benchmar
}
}
}
lib.close();
return association;
}
bool does_it_match(const string& result, const string& id_motif) {
ifstream f_res(result);
if (f_res.is_open()) {
string name;
string seq;
string struc;
string contacts;
getline(f_res, name);
getline(f_res, seq);
while (getline(f_res, struc)) {
string motif_json = "JSON" + id_motif + " +";
if(struc.find(motif_json, 0) != string::npos) {
return true;
}
motif_json = "JSON" + id_motif + "\n";
if(struc.find(motif_json, 0) != string::npos) {
return true;
}
getline(f_res,contacts);
}
f_res.close();
}
return false;
}
vector<string> select_not_motif(const string& bibli, const string& benchmark) {
vector<string> selection;
vector<pair<string, string>> association = find_id(bibli, benchmark);
vector<string> list_bm = get_list_pdb_benchmark(benchmark);
string path_begin = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_";
string path_MFE_F = ".json_pmF_MEA";
for (pair<string, string> p : association) {
string id_motif = p.second;
selection.push_back(id_motif);
}
for (pair<string, string> p : association) {
cout << p.first << ", " << p.second << endl;
}
cout << "size: " << association.size() << endl;
for (string pdb : list_bm) {
string path_result = path_begin + pdb + path_MFE_F;
for (pair<string,string> pair : association) {
if (pair.first.substr(0, pair.first.size()-2).compare(pdb.substr(0, pdb.size()-2)) != 0) {
bool test = does_it_match(path_result, pair.second);
if (test) {
//if (!(pair.second.compare("954"))) { cout << "p1: " << pair.first << "pdb: " << pdb << endl;}
auto position = find(selection.begin(), selection.end(), pair.second);
if (position != selection.end()) {
int index = position - selection.begin();
selection.erase(selection.begin() + index);
}
}
}
}
}
sort(selection.begin(), selection.end() );
selection.erase(unique(selection.begin(), selection.end() ), selection.end() );
cout << "size: " << selection.size() << endl;
return selection;
}
int main()
{
string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
vector<pair<string, string>> association = find_id(bibli, benchmark);
/*for (pair<string,string> p : association) {
/*vector<pair<string, string>> association = find_id(bibli, benchmark);
for (pair<string,string> p : association) {
cout << "<" << p.first << ", " << p.second << ">" << endl;
}*/
vector<string> selection = select_not_motif(bibli, benchmark);
for (string str : selection) {
cout << str << ", ";
}
cout << endl;
/*string result = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_1U6P_B.json_pmF_MEA";
bool test = does_it_match(result, "150");
cout << "test : " << test << endl;*/
return 0;
}
\ No newline at end of file
......