Louis BECQUEY

merged branches

......@@ -11,29 +11,8 @@
using namespace std;
using json = nlohmann::json;
bool is_contains_set(set<string>& s1, set<string>& s2) {
//cout << "-----begin------" << endl;
set<string>::iterator subset;
set<string>::iterator it ;
uint size1 = s1.size();
uint size2 = s2.size();
if (size1 > size2) {
//cout << "size1: " << size1 << ", size2: " << size2 << endl;
return false;
}
for(string s: s1) {
//cout << "s1: " << s << endl;
if(s2.count(s) == 0) {
//cout << "count: " << s2.count(s) << endl;
return false;
}
}
//cout << "-----end------" << endl;
return true;
}
//Return true if the first sequence seq1 is included in the second sequence seq2
//if not return false
int is_contains(string& seq1, string& seq2) {
uint size1 = seq1.size();
......@@ -57,7 +36,8 @@ int is_contains(string& seq1, string& seq2) {
}
//If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B,
//remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list.
void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
std::ifstream lib(jsonfile);
std::ifstream lib2(jsonfile);
......@@ -70,6 +50,7 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
json js = json::parse(lib);
json js2 = json::parse(lib2);
//the list of pfam lists of the motif we want to count the inclusion in other motif
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
string test;
......@@ -219,7 +200,8 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
//cout << "tab[" << ii << "]: " << tab[ii] << endl;
tab[ii] = 0;
}
//int number_comp[composantes.size()];
//flag is true if the first component is found or if the k component is indeed placed after the k-1 component
//It checks if the found components are in the correct order
for (uint k = 0; k < composantes.size() ; k++) {
bool flag = false;
for (uint l = 0; l < composantes2.size(); l++) {
......@@ -242,14 +224,16 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
}
}
// if number equal to the size of the number of component in the motif, it means that the motif is included.
//So we add the intersection of the two pfams list to the motif
if(number == composantes.size()) {
cout << "id: " << id << " / id2: " << id2 << endl;
vector<vector<string>> add_pfams;
std::set_difference(list_pfams2.begin(), list_pfams2.end(), list_pfams.begin(), list_pfams.end(),
std::inserter(add_pfams, add_pfams.begin()));
std::set_union(list_pfams.begin(), list_pfams.end(), add_pfams.begin(), add_pfams.end(),
std::inserter(union_pfams, union_pfams.begin()));
list_pfams = union_pfams;
list_pfams.insert(list_pfams.begin(), add_pfams.begin(), add_pfams.end());
cout << "size: " << list_pfams.size() << endl;
add_pfams.clear();
is_change = true;
}
}
......@@ -257,22 +241,17 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
}
//cout << endl;*/
}
/*for(uint ii = 0; ii < union_pfams.size(); ii++) {
for (uint jj = 0; jj < union_pfams[ii].size(); jj++) {
cout << "[" << ii << "][" << jj << "]: " << union_pfams[ii][jj] << endl;
/*for(uint ii = 0; ii < list_pfams.size(); ii++) {
for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
}
}*/
if (is_change) {
new_id["occurences"] = union_pfams.size();
new_id["pfam"] = union_pfams;
} else {
new_id["occurences"] = occurrences;
new_id["pfam"] = list_pfams;
//}
new_id["occurences"] = list_pfams.size();
new_id["pfam"] = list_pfams;
//cout << "-------ending---------" << endl;
new_motif[id] = new_id;
new_id.clear();
......@@ -296,6 +275,5 @@ int main()
//cout << "------------------END-----------------" << endl;
return 0;
}
}
\ No newline at end of file
......
This diff is collapsed. Click to expand it.
......@@ -336,7 +336,6 @@ bool checkSecondaryStructure(string struc)
vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
{
// /!\ returns 0 if no errors
//cout << "---begin----" << endl;
std::ifstream motif;
motif = std::ifstream(jsonfile);
json js = json::parse(motif);
......@@ -345,84 +344,83 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
uint fin = 0;
std::string keys[6] = {"contacts", "occurences", "pdb", "pfam", "sequence", "struct2d"};
// Iterating over Motifs
for (auto i = js.begin(); i != js.end(); ++i) {
int j = 0;
string id = i.key();
string complete_seq;
//cout << id << ": " << endl;
// Iterating over json keys
for (auto it = js[id].begin(); it != js[id].end(); ++it) {
string test = it.key();
//std::cout << "test: " << test << endl;
if (test.compare(keys[j])){
if (test.compare(keys[j]))
{
//std::cout << "error header : keys[" << j << "]: " << keys[j] << " vs test: " << test << endl;
errors_id.push_back(make_pair(stoi(id), 'd'));
//return 'd';
} else if(!test.compare(keys[5])) {
}
else if(!test.compare(keys[5])) // This is the secondary structure field
{
//std::cout << "struct2d: " << it.value() << endl;
string ss = it.value();
if (ss.empty()) {
//std::cout << "error empty" <<endl;
errors_id.push_back(make_pair(stoi(id), 'f'));
//return 'f';
break;
} else if (!checkSecondaryStructure(ss)) {
//std::cout << "error bracket" <<endl;
errors_id.push_back(make_pair(stoi(id), 'n'));
//return 'n';
break;
} else if (ss.size() != complete_seq.size()) {
errors_id.push_back(make_pair(stoi(id), 'x'));
break;
}
} else if (!test.compare(keys[4])) {
}
else if (!test.compare(keys[4])) // This is the sequence field
{
//std::cout << "sequence: " << it.value() << "\n";
string seq = it.value();
complete_seq = seq;
if (seq.empty()) {
//std::cout << "error empty 2" <<endl;
errors_id.push_back(make_pair(stoi(id), 'e'));
//return 'l';
} else if (seq.size() == 1) {
//std::cout << "error too short" << endl;
break;
}
if (seq.size() < 4) {
errors_id.push_back(make_pair(stoi(id), 'l'));
} else {
break;
}
// Iterate on components to check their length
string subseq;
if (seq.size() > 3) {
while((seq.find('&') != string::npos)) {
fin = seq.find('&');
subseq = seq.substr(0, fin);
seq = seq.substr(fin + 1);
if (subseq.size() >= 2) {
components.push_back(subseq);
//std::cout << "subseq: " << subseq << endl;
} else {
errors_id.push_back(make_pair(stoi(id), 'k'));
//std::cout << "error too short1" << endl;
}
}
if (seq.size() >= 2) {
components.push_back(seq);
//std::cout << "subseq: " << seq << endl;
} else {
errors_id.push_back(make_pair(stoi(id), 'k'));
//std::cout << "error too short2" << endl;
}
size_t n = 0;
for (uint ii = 0; ii < components.size(); ii++) {
n += components[ii].size();
}
if(n <= 3) {
errors_id.push_back(make_pair(stoi(id), 'k'));
}
while((seq.find('&') != string::npos)) {
fin = seq.find('&');
subseq = seq.substr(0, fin);
seq = seq.substr(fin + 1);
if (subseq.size() >= 2) {
components.push_back(subseq);
} else {
errors_id.push_back(make_pair(stoi(id), 'k'));
}
}
if (seq.size() >= 2) { // Last component after the last &
components.push_back(seq);
} else {
errors_id.push_back(make_pair(stoi(id), 'k'));
}
size_t n = 0;
for (uint ii = 0; ii < components.size(); ii++) {
n += components[ii].size();
}
if(n <= 3) {
errors_id.push_back(make_pair(stoi(id), 'l'));
}
}
j++;
//cout << "test fin" << endl << endl;
}
//std::cout << "no error!\n" << endl;
}
return errors_id;
//cout << "---end----" << endl;
}
bool is_desc_insertible(const string& descfile, const string& rna)
......
This diff is collapsed. Click to expand it.
#include <iostream>
#include <sstream>
#include <fstream>
#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
#include <typeinfo>
#include <set>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
using json = nlohmann::json;
void delete_redundant_pdb(const string& jsonfile, const string& jsontest, const string& jsonoutfile) {
std::ifstream lib(jsonfile);
std::ifstream lib2(jsontest);
std::ofstream outfile (jsonoutfile);
json new_motif;
json new_id;
json js = json::parse(lib);
json js2 = json::parse(lib2);
//the list of pfam lists of the motif we want to count the inclusion in other motif
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
vector<string> list_pdbs;
vector<string> list_pdbs2;
bool is_added = true;
//cout << "id: " << id << endl;
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string test = it2.key();
if (!test.compare("pdb")) {
vector<string> tab = it2.value();
list_pdbs = tab;
/*set<set<string>>::iterator iit;
set<string>::iterator iit2;
for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
cout << *iit2 << endl;
}
cout << endl << endl;
}*/
} else {
new_id[test] = it2.value();
}
}
//cout << "-------begin---------" << endl;
for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
string id2 = it3.key();
//cout << "id: " << id << " / id2: " << id2 << endl;
for (auto it4 = js[id2].begin(); it4 != js[id2].end(); ++it4) {
string test = it4.key();
if (!test.compare("pdb")) {
vector<string> tab = it4.value();
list_pdbs2 = tab;
//cout << id << " / " << id2 << endl;
for (uint k = 0; k < list_pdbs2.size(); k++) {
if (count(list_pdbs.begin(), list_pdbs.end(), list_pdbs2[k])) {
is_added = false;
}
//cout << list_pdbs2[k] << endl;
}
}
}
//cout << endl;*/
}
/*for(uint ii = 0; ii < list_pfams.size(); ii++) {
for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
}
}*/
if (is_added) {
new_id["pdb"] = list_pdbs;
new_motif[id] = new_id;
}
new_id.clear();
//cout << "valeur: " << ite << endl;
/*for (uint i = 0; i < tab_struc.size() ; i++) {
cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl;
} */
}
outfile << new_motif.dump(4) << endl;
outfile.close();
}
int main()
{
string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/bibli_test2.json";
string jsontest = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark_test.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final_test.json";
delete_redundant_pdb(jsonfile, jsontest, out);
return 0;
}
> JSON1000_extended
AAUAUCCGGGCGUUUAAUCCCGGGAUAAA
\ No newline at end of file