Nathalie BERNARD

Ajout du début d'un script qui permettrai de retirer les motifs qui ne match qu'…

…avec leurs séquence d'origine
1 +#include <iostream>
2 +#include <sstream>
3 +#include <fstream>
4 +#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
5 +#include <typeinfo>
6 +#include <set>
7 +#include <algorithm>
8 +#include <cstdio>
9 +#include <vector>
10 +
11 +using namespace std;
12 +using json = nlohmann::json;
13 +
14 +/*
15 +That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from.
16 +*/
17 +
18 +vector<string> get_list_pdb_benchmark(const string& benchmark) {
19 +
20 + ifstream bm(benchmark);
21 + vector<string> list_pdb;
22 + if (bm.is_open()) {
23 + string name;
24 + string sequence;
25 + string structure;
26 + string contacts;
27 +
28 + while (getline(bm, name)) {
29 + int size = name.size();
30 + name = name.substr(5,size-8);
31 + list_pdb.push_back(name);
32 +
33 + getline(bm, sequence);
34 + getline(bm, structure);
35 + getline(bm, contacts);
36 + }
37 + bm.close();
38 + }
39 + return list_pdb;
40 +}
41 +
42 +string trim(string str) {
43 + int size = str.size();
44 + str = str.substr(1, size-2);
45 + return str;
46 +}
47 +
48 +bool find_id_pattern(string& pdb_pattern, const string& benchmark) {
49 + vector<string> l = get_list_pdb_benchmark(benchmark);
50 + for (string pdb_bm : l) {
51 + if (!pdb_bm.compare(pdb_pattern)) {
52 + return true;
53 + }
54 + }
55 + return false;
56 +}
57 +
58 +vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) {
59 + std::ifstream lib(bibli);
60 + json js = json::parse(lib);
61 +
62 + vector<pair<string, string>> association;
63 +
64 + for (auto it = js.begin(); it != js.end(); ++it) {
65 + string id = it.key();
66 + for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
67 + string field = it2.key();
68 + if (!field.compare("pdb")) {
69 + int n = js[id][field].size();
70 + for (int i = 0; i < n ; i++) {
71 + ostringstream stream;
72 + stream << js[id][field][i];
73 + string pdb = trim(stream.str());
74 + if (find_id_pattern(pdb, benchmark)) {
75 + pair<string, string> p;
76 + p.first = pdb;
77 + p.second = id;
78 + association.push_back(p);
79 + }
80 + }
81 + }
82 + }
83 + }
84 +
85 + lib.close();
86 + return association;
87 +}
88 +
89 +int main()
90 +{
91 + string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
92 + string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
93 +
94 + vector<pair<string, string>> association = find_id(bibli, benchmark);
95 + /*for (pair<string,string> p : association) {
96 + cout << "<" << p.first << ", " << p.second << ">" << endl;
97 + }*/
98 +
99 + return 0;
100 +}
...\ No newline at end of file ...\ No newline at end of file