selecting_id.cpp
2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <iostream>
#include <sstream>
#include <fstream>
#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
#include <typeinfo>
#include <set>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
using json = nlohmann::json;
/*
That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from.
*/
vector<string> get_list_pdb_benchmark(const string& benchmark) {
ifstream bm(benchmark);
vector<string> list_pdb;
if (bm.is_open()) {
string name;
string sequence;
string structure;
string contacts;
while (getline(bm, name)) {
int size = name.size();
name = name.substr(5,size-8);
list_pdb.push_back(name);
getline(bm, sequence);
getline(bm, structure);
getline(bm, contacts);
}
bm.close();
}
return list_pdb;
}
string trim(string str) {
int size = str.size();
str = str.substr(1, size-2);
return str;
}
bool find_id_pattern(string& pdb_pattern, const string& benchmark) {
vector<string> l = get_list_pdb_benchmark(benchmark);
for (string pdb_bm : l) {
if (!pdb_bm.compare(pdb_pattern)) {
return true;
}
}
return false;
}
vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) {
std::ifstream lib(bibli);
json js = json::parse(lib);
vector<pair<string, string>> association;
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string field = it2.key();
if (!field.compare("pdb")) {
int n = js[id][field].size();
for (int i = 0; i < n ; i++) {
ostringstream stream;
stream << js[id][field][i];
string pdb = trim(stream.str());
if (find_id_pattern(pdb, benchmark)) {
pair<string, string> p;
p.first = pdb;
p.second = id;
association.push_back(p);
}
}
}
}
}
lib.close();
return association;
}
int main()
{
string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
vector<pair<string, string>> association = find_id(bibli, benchmark);
/*for (pair<string,string> p : association) {
cout << "<" << p.first << ", " << p.second << ">" << endl;
}*/
return 0;
}