Nathalie BERNARD

Nettoyage des fichiers avec des commentaires en plus

Showing 62 changed files with 161 additions and 1757 deletions
This diff is collapsed. Click to expand it.
...@@ -65,9 +65,9 @@ Check the file [INSTALL.md](INSTALL.md) for installation instructions. ...@@ -65,9 +65,9 @@ Check the file [INSTALL.md](INSTALL.md) for installation instructions.
65 ``` 65 ```
66 Usage: You must provide: 66 Usage: You must provide:
67 1) a FASTA input file with -i, 67 1) a FASTA input file with -i,
68 - 2) a module type with --rna3dmotifs, --carnaval or --3dmotifatlas 68 + 2) a module type with --rna3dmotifs, --carnaval, --3dmotifatlas or --contacts
69 3) one module placement method in { --patternmatch, --jar3d, --bayespairing } 69 3) one module placement method in { --patternmatch, --jar3d, --bayespairing }
70 - 4) one scoring function with --func A, B, C or D 70 + 4) one scoring function with --func A, B, C, D, E ou F
71 71
72 If you are not using the Docker image: 72 If you are not using the Docker image:
73 5) --modules-path, --biorseo-dir and (--jar3d-exec or --bypdir) 73 5) --modules-path, --biorseo-dir and (--jar3d-exec or --bypdir)
...@@ -79,6 +79,7 @@ Options: ...@@ -79,6 +79,7 @@ Options:
79 --rna3dmotifs Use DESC modules from Djelloul & Denise, 2008 79 --rna3dmotifs Use DESC modules from Djelloul & Denise, 2008
80 --carnaval Use RIN modules from Reinharz & al, 2018 80 --carnaval Use RIN modules from Reinharz & al, 2018
81 --3dmotifatlas Use the HL and IL loops from BGSU's 3D Motif Atlas (updated) 81 --3dmotifatlas Use the HL and IL loops from BGSU's 3D Motif Atlas (updated)
82 +--contacts Use the library of motifs, created from RNA sequences linked to proteins provided by I. Chauvot de Beauchene of LORIA laboratory
82 -p [ --patternmatch ] Use regular expressions to place modules in the sequence (requires --rna3dmotifs or --carnaval) 83 -p [ --patternmatch ] Use regular expressions to place modules in the sequence (requires --rna3dmotifs or --carnaval)
83 -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas) 84 -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas)
84 -b [ --bayespairing ] Use BayesPairing2 to place modules in the sequence (requires --rna3dmotifs or --3dmotifatlas) 85 -b [ --bayespairing ] Use BayesPairing2 to place modules in the sequence (requires --rna3dmotifs or --3dmotifatlas)
...@@ -123,5 +124,6 @@ The allowed module/placement-method/function combinations are: ...@@ -123,5 +124,6 @@ The allowed module/placement-method/function combinations are:
123 --rna3dmotifs A. B. A. B. C. D. 124 --rna3dmotifs A. B. A. B. C. D.
124 --3dmotifatlas A. B. C. D. A. B. C. D. 125 --3dmotifatlas A. B. C. D. A. B. C. D.
125 --carnaval A. B. 126 --carnaval A. B.
127 +--contacts E. F.
126 128
127 ``` 129 ```
......
...@@ -381,7 +381,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -381,7 +381,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
381 break; 381 break;
382 382
383 case 'E': 383 case 'E':
384 - // Fonction f1E 384 + // Fonction f1E
385 for (const Component& c : insertion_sites_[i].comp) sum_k += c.k; 385 for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
386 obj1 += IloNum(sum_k * insertion_sites_[i].contact_ * insertion_sites_[i].tx_occurrences_) * insertion_dv_[index_of_first_components[i]] ; 386 obj1 += IloNum(sum_k * insertion_sites_[i].contact_ * insertion_sites_[i].tx_occurrences_) * insertion_dv_[index_of_first_components[i]] ;
387 break; 387 break;
...@@ -395,6 +395,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -395,6 +395,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
395 395
396 } 396 }
397 } 397 }
398 + //Stacking energy parameter matrix
398 double energy[7][7] = { 399 double energy[7][7] = {
399 {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, 400 {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
400 {0.0, 1.1, 2.1, 2.2, 1.4, 0.9, 0.6}, 401 {0.0, 1.1, 2.1, 2.2, 1.4, 0.9, 0.6},
...@@ -408,7 +409,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -408,7 +409,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
408 obj2 = IloExpr(env_); 409 obj2 = IloExpr(env_);
409 switch (obj_function2_nbr_) { 410 switch (obj_function2_nbr_) {
410 case 'a': 411 case 'a':
411 - // Define the MFE: 412 + // Define the MFE (Minimum Free Energy):
412 for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) { 413 for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) {
413 for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) { 414 for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) {
414 if (get_xij_index(u, v) != rna_.get_RNA_length() * rna_.get_RNA_length() + 1) { 415 if (get_xij_index(u, v) != rna_.get_RNA_length() * rna_.get_RNA_length() + 1) {
...@@ -429,7 +430,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -429,7 +430,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
429 } 430 }
430 break; 431 break;
431 } 432 }
432 - //std::cout << "\n fin \n";
433 } 433 }
434 434
435 MOIP::~MOIP() { env_.end(); } 435 MOIP::~MOIP() { env_.end(); }
...@@ -705,7 +705,6 @@ void MOIP::define_problem_constraints(string& source) ...@@ -705,7 +705,6 @@ void MOIP::define_problem_constraints(string& source)
705 705
706 SecondaryStructure MOIP::solve_objective(int o, double min, double max) 706 SecondaryStructure MOIP::solve_objective(int o, double min, double max)
707 { 707 {
708 - //cout << endl << "BEGIN" << endl;
709 // Solves one of the objectives, under constraint that the other should be in [min, max] 708 // Solves one of the objectives, under constraint that the other should be in [min, max]
710 709
711 if (min > max) { 710 if (min > max) {
...@@ -755,17 +754,11 @@ SecondaryStructure MOIP::solve_objective(int o, double min, double max) ...@@ -755,17 +754,11 @@ SecondaryStructure MOIP::solve_objective(int o, double min, double max)
755 } 754 }
756 755
757 // if (verbose_) cout << "\t\t>retrieving basepairs of the result secondary structure..." << endl; 756 // if (verbose_) cout << "\t\t>retrieving basepairs of the result secondary structure..." << endl;
758 - //cout << "y(2,80): " << cplex_.getValue(y(u, v)) << endl;
759 for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) 757 for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++)
760 for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) 758 for (size_t v = u + 4; v < rna_.get_RNA_length(); v++)
761 if (allowed_basepair(u, v)) 759 if (allowed_basepair(u, v))
762 if (cplex_.getValue(y(u, v)) > 0.5) { 760 if (cplex_.getValue(y(u, v)) > 0.5) {
763 best_ss.set_basepair(u, v); 761 best_ss.set_basepair(u, v);
764 - /*if (u == 5 && v == 26) {
765 - cout << endl << "(" << u << "," << v << "): " << endl;
766 - cout << best_ss.to_string() << endl;
767 - cout << "(((...((((((((....))))))))(((.....((((((((....)))))))))))...((((((((....)))))))))))" << endl;
768 - }*/
769 } 762 }
770 763
771 best_ss.sort(); // order the basepairs in the vector 764 best_ss.sort(); // order the basepairs in the vector
...@@ -1159,7 +1152,6 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct) ...@@ -1159,7 +1152,6 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct)
1159 } 1152 }
1160 } 1153 }
1161 1154
1162 -//Temporaire--------------------------------------
1163 1155
1164 //Check if the sequence is a rna sequence (ATGC) and replace T by U or remove modified nucleotide if necessary 1156 //Check if the sequence is a rna sequence (ATGC) and replace T by U or remove modified nucleotide if necessary
1165 string check_motif_sequence(string seq) { 1157 string check_motif_sequence(string seq) {
...@@ -1184,9 +1176,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { ...@@ -1184,9 +1176,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
1184 stack<uint> accolades; 1176 stack<uint> accolades;
1185 stack<uint> chevrons; 1177 stack<uint> chevrons;
1186 1178
1187 - /*for(uint j = 0; j < v.size(); j++) {
1188 - cout << "composante: (" << v[j].pos.first << "," << v[j].pos.second << ")" << endl << endl;
1189 - }*/
1190 uint count = 0; 1179 uint count = 0;
1191 uint debut = v[count].pos.first; 1180 uint debut = v[count].pos.first;
1192 uint gap = 0; 1181 uint gap = 0;
...@@ -1194,12 +1183,10 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { ...@@ -1194,12 +1183,10 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
1194 for (uint i = 0; i < struc.size(); i++) { 1183 for (uint i = 0; i < struc.size(); i++) {
1195 if (struc[i] == '(') { 1184 if (struc[i] == '(') {
1196 parentheses.push(i + debut + gap - count); 1185 parentheses.push(i + debut + gap - count);
1197 - //cout << "i: " << i << " pos :" << parentheses.top() << endl;
1198 1186
1199 } else if (struc[i] == ')') { 1187 } else if (struc[i] == ')') {
1200 Link l; 1188 Link l;
1201 l.nts.first = parentheses.top(); 1189 l.nts.first = parentheses.top();
1202 - //cout << "top :" << parentheses.top() << endl;
1203 l.nts.second = i + debut + gap - count; 1190 l.nts.second = i + debut + gap - count;
1204 vec.push_back(l); 1191 vec.push_back(l);
1205 parentheses.pop(); 1192 parentheses.pop();
...@@ -1237,8 +1224,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { ...@@ -1237,8 +1224,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
1237 } else if (struc[i] == '&') { 1224 } else if (struc[i] == '&') {
1238 count ++; 1225 count ++;
1239 gap += v[count].pos.first - v[count - 1].pos.second - 1; 1226 gap += v[count].pos.first - v[count - 1].pos.second - 1;
1240 - //cout << "count: " << count << endl;
1241 - //cout << "gap : " << gap << endl;
1242 } 1227 }
1243 } 1228 }
1244 return vec; 1229 return vec;
...@@ -1311,11 +1296,9 @@ vector<string> find_components(string sequence, string delimiter) { ...@@ -1311,11 +1296,9 @@ vector<string> find_components(string sequence, string delimiter) {
1311 subseq = seq.substr(0, fin); 1296 subseq = seq.substr(0, fin);
1312 seq = seq.substr(fin + 1); 1297 seq = seq.substr(fin + 1);
1313 list.push_back(subseq); // new component sequence 1298 list.push_back(subseq); // new component sequence
1314 - //std::cout << "subseq: " << subseq << endl;
1315 } 1299 }
1316 if (!seq.empty()) { 1300 if (!seq.empty()) {
1317 list.push_back(seq); 1301 list.push_back(seq);
1318 - //std::cout << "subseq: " << seq << endl;
1319 } 1302 }
1320 return list; 1303 return list;
1321 } 1304 }
...@@ -1324,15 +1307,11 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { ...@@ -1324,15 +1307,11 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) {
1324 vector<uint> positions; 1307 vector<uint> positions;
1325 string delimiter = "*"; 1308 string delimiter = "*";
1326 uint debut; 1309 uint debut;
1327 - /*cout << "vsize: " << v.size() << endl;
1328 - cout << "struc2dsize: " << struc2d.size() << endl;*/
1329 for (uint i = 0; i < v.size(); i++) { 1310 for (uint i = 0; i < v.size(); i++) {
1330 - //cout << "[" << i << "]:" << endl;
1331 debut = v[i].pos.first; 1311 debut = v[i].pos.first;
1332 uint pos = struc2d[i].find(delimiter, 0); 1312 uint pos = struc2d[i].find(delimiter, 0);
1333 while(pos != string::npos && pos <= struc2d[i].size()) 1313 while(pos != string::npos && pos <= struc2d[i].size())
1334 { 1314 {
1335 - //cout << "position: " << pos + debut << endl;
1336 positions.push_back(pos + debut); 1315 positions.push_back(pos + debut);
1337 pos = struc2d[i].find(delimiter, pos+1); 1316 pos = struc2d[i].find(delimiter, pos+1);
1338 } 1317 }
...@@ -1340,8 +1319,6 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { ...@@ -1340,8 +1319,6 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) {
1340 return positions; 1319 return positions;
1341 } 1320 }
1342 1321
1343 -//Temporaire--------------------------------------
1344 -
1345 void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id) 1322 void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id)
1346 { 1323 {
1347 /* 1324 /*
...@@ -1373,8 +1350,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai ...@@ -1373,8 +1350,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
1373 string keys[5] = {"contacts", "occurences", "pdb", "sequence", "struct2d"}; 1350 string keys[5] = {"contacts", "occurences", "pdb", "sequence", "struct2d"};
1374 uint it_errors = 0; 1351 uint it_errors = 0;
1375 uint comp; 1352 uint comp;
1376 - //uint max_occ = 0;
1377 - //uint max_n = 0;
1378 uint occ = 0; 1353 uint occ = 0;
1379 1354
1380 for(auto it = js.begin(); it != js.end(); ++it) { 1355 for(auto it = js.begin(); it != js.end(); ++it) {
...@@ -1385,10 +1360,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai ...@@ -1385,10 +1360,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
1385 // Check for known errors to ignore corresponding motifs 1360 // Check for known errors to ignore corresponding motifs
1386 if (comp == errors_id[it_errors].first) { 1361 if (comp == errors_id[it_errors].first) {
1387 while (comp == errors_id[it_errors].first) { 1362 while (comp == errors_id[it_errors].first) {
1388 - //cout << "id erreur: " << errors_id[it_errors].first << " " << errors_id[it_errors].second << endl;
1389 - /*if (contacts_id.compare("974") == 0) {
1390 - cout << "id erreur: " << errors_id[it_errors].second << endl;
1391 - }*/
1392 it_errors ++; 1363 it_errors ++;
1393 } 1364 }
1394 continue; 1365 continue;
...@@ -1396,7 +1367,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai ...@@ -1396,7 +1367,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
1396 1367
1397 for(auto it2 = js[contacts_id].begin(); it2 != js[contacts_id].end(); ++it2) { 1368 for(auto it2 = js[contacts_id].begin(); it2 != js[contacts_id].end(); ++it2) {
1398 field = it2.key(); 1369 field = it2.key();
1399 - //cout << "field: " << field << endl;
1400 if (!field.compare(keys[0])) // This is the contacts field 1370 if (!field.compare(keys[0])) // This is the contacts field
1401 { 1371 {
1402 contacts = it2.value(); 1372 contacts = it2.value();
...@@ -1406,25 +1376,17 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai ...@@ -1406,25 +1376,17 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
1406 else if (!field.compare(keys[1])) // This is the occurences field 1376 else if (!field.compare(keys[1])) // This is the occurences field
1407 { 1377 {
1408 occ = it2.value(); 1378 occ = it2.value();
1409 - //max_occ = find_max_occurrences(filepath);
1410 tx_occurrences = (double)occ; // / (double)max_occ; 1379 tx_occurrences = (double)occ; // / (double)max_occ;
1411 - //cout << "occ: " << tx_occurrences << endl;
1412 1380
1413 } 1381 }
1414 else if (!field.compare(keys[2])) // This is the pdb field 1382 else if (!field.compare(keys[2])) // This is the pdb field
1415 { 1383 {
1416 vector<string> tab = it2.value(); 1384 vector<string> tab = it2.value();
1417 pdbs = tab; 1385 pdbs = tab;
1418 - /*for (uint i = 0; i < pdbs.size(); i++) {
1419 - cout << "pdbs[" << i << "]: " << pdbs[i] << endl;
1420 - }*/
1421 -
1422 } 1386 }
1423 else if (!field.compare(keys[3])) // This is the sequence field 1387 else if (!field.compare(keys[3])) // This is the sequence field
1424 { 1388 {
1425 seq = check_motif_sequence(it2.value()); 1389 seq = check_motif_sequence(it2.value());
1426 - /*max_n = find_max_sequence(filepath);
1427 - tx_occurrences = (double)occ / (double)max_n - seq.size() + 1 ;*/
1428 component_sequences = find_components(seq, "&"); 1390 component_sequences = find_components(seq, "&");
1429 } 1391 }
1430 else if (!field.compare(keys[4])) // This is the struct2D field 1392 else if (!field.compare(keys[4])) // This is the struct2D field
...@@ -1440,9 +1402,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai ...@@ -1440,9 +1402,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
1440 1402
1441 Motif temp_motif = Motif(v, contacts_id, nb_contacts, tx_occurrences); 1403 Motif temp_motif = Motif(v, contacts_id, nb_contacts, tx_occurrences);
1442 temp_motif.links_ = search_pairing(struct2d, v); 1404 temp_motif.links_ = search_pairing(struct2d, v);
1443 - //cout << "test" << endl;
1444 temp_motif.pos_contacts = find_contacts(component_contacts, v); 1405 temp_motif.pos_contacts = find_contacts(component_contacts, v);
1445 - //cout << "test2" << endl;
1446 1406
1447 // Check if the motif can be inserted, checking the basepairs probabilities and theta 1407 // Check if the motif can be inserted, checking the basepairs probabilities and theta
1448 bool unprobable = false; 1408 bool unprobable = false;
......
...@@ -275,8 +275,7 @@ char Motif::is_valid_RIN(const string& rinfile) ...@@ -275,8 +275,7 @@ char Motif::is_valid_RIN(const string& rinfile)
275 return (char) 0; 275 return (char) 0;
276 } 276 }
277 277
278 -//temporaire--------------------------------------------------- 278 +//check that there are as many opening parentheses as closing ones
279 -
280 bool checkSecondaryStructure(string struc) 279 bool checkSecondaryStructure(string struc)
281 { 280 {
282 stack<uint> parentheses; 281 stack<uint> parentheses;
...@@ -332,6 +331,7 @@ bool checkSecondaryStructure(string struc) ...@@ -332,6 +331,7 @@ bool checkSecondaryStructure(string struc)
332 return (parentheses.empty() && crochets.empty() && accolades.empty() && chevrons.empty()); 331 return (parentheses.empty() && crochets.empty() && accolades.empty() && chevrons.empty());
333 } 332 }
334 333
334 +//count the number of nucleotide in the motif sequence
335 size_t count_nucleotide(string& seq) { 335 size_t count_nucleotide(string& seq) {
336 size_t count = 0; 336 size_t count = 0;
337 for(uint i = 0; i < seq.size(); i++) { 337 for(uint i = 0; i < seq.size(); i++) {
...@@ -343,6 +343,7 @@ size_t count_nucleotide(string& seq) { ...@@ -343,6 +343,7 @@ size_t count_nucleotide(string& seq) {
343 return count; 343 return count;
344 } 344 }
345 345
346 +//count the numbre of '&' in the motif sequence
346 size_t count_delimiter(string& seq) { 347 size_t count_delimiter(string& seq) {
347 size_t count = 0; 348 size_t count = 0;
348 for(uint i = 0; i < seq.size(); i++) { 349 for(uint i = 0; i < seq.size(); i++) {
...@@ -354,7 +355,6 @@ size_t count_delimiter(string& seq) { ...@@ -354,7 +355,6 @@ size_t count_delimiter(string& seq) {
354 return count; 355 return count;
355 } 356 }
356 357
357 -//--------------------------------------------------------------
358 vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) 358 vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
359 { 359 {
360 // /!\ returns 0 if no errors 360 // /!\ returns 0 if no errors
...@@ -458,7 +458,6 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) ...@@ -458,7 +458,6 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
458 } 458 }
459 j++; 459 j++;
460 } 460 }
461 - //std::cout << "no error!\n" << endl;
462 } 461 }
463 return errors_id; 462 return errors_id;
464 } 463 }
...@@ -524,17 +523,9 @@ vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<stri ...@@ -524,17 +523,9 @@ vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<stri
524 if (regex_search(rna, c)) { 523 if (regex_search(rna, c)) {
525 if (vc.size() > 2) { 524 if (vc.size() > 2) {
526 next_seqs = vector<string>(&vc[1], &vc[vc.size()]); 525 next_seqs = vector<string>(&vc[1], &vc[vc.size()]);
527 - /*for (uint i = 0; i < next_seqs.size(); i++) {
528 - std::cout << "next seq: " << next_seqs[i] << endl;
529 - }
530 - std::cout << endl;*/
531 } 526 }
532 else { 527 else {
533 next_seqs = vector<string>(1, vc.back()); 528 next_seqs = vector<string>(1, vc.back());
534 - /*for (uint i = 0; i < next_seqs.size(); i++) {
535 - std::cout << "next seq: " << next_seqs[i] << endl;
536 - }
537 - std::cout << endl;*/
538 } 529 }
539 uint j = 0; 530 uint j = 0;
540 // For every regexp match 531 // For every regexp match
...@@ -606,17 +597,9 @@ vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector ...@@ -606,17 +597,9 @@ vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector
606 if (regex_search(rna, c)) { 597 if (regex_search(rna, c)) {
607 if (vc.size() > 2) { 598 if (vc.size() > 2) {
608 next_seqs = vector<string>(&vc[1], &vc[vc.size()]); 599 next_seqs = vector<string>(&vc[1], &vc[vc.size()]);
609 - /*for (uint i = 0; i < next_seqs.size(); i++) {
610 - std::cout << "next seq: " << next_seqs[i] << endl;
611 - }
612 - std::cout << endl;*/
613 } 600 }
614 else { 601 else {
615 next_seqs = vector<string>(1, vc.back()); 602 next_seqs = vector<string>(1, vc.back());
616 - /*for (uint i = 0; i < next_seqs.size(); i++) {
617 - std::cout << "next seq: " << next_seqs[i] << endl;
618 - }
619 - std::cout << endl;*/
620 } 603 }
621 uint j = 0; 604 uint j = 0;
622 // For every regexp match 605 // For every regexp match
......
1 -#include <iostream>
2 -#include <sstream>
3 -#include <fstream>
4 -#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
5 -#include <typeinfo>
6 -#include <set>
7 -#include <algorithm>
8 -#include <cstdio>
9 -#include <vector>
10 -
11 -using namespace std;
12 -using json = nlohmann::json;
13 -
14 -//Concatenate the motives from jsonmotifs by adding the corresponding pdb from jsondssr
15 -void add_pdb(const string& jsonmotifs, const string& jsondssr, const string& jsonoutfile) {
16 - std::ifstream lib(jsonmotifs);
17 - std::ifstream lib2(jsondssr);
18 -
19 - std::ofstream outfile (jsonoutfile);
20 - json new_motif;
21 - json new_id;
22 - json js = json::parse(lib);
23 - json js2 = json::parse(lib2);
24 -
25 - for (auto it = js.begin(); it != js.end(); ++it) {
26 - string id = it.key();
27 -
28 - string sequence, structure;
29 - vector<string> list_pdbs;
30 - vector<string> list_pdbs2;
31 - bool is_added = true;
32 -
33 - //cout << "id: " << id << endl;
34 - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
35 - string test = it2.key();
36 -
37 - if (!test.compare("sequence")) {
38 - sequence = it2.value();
39 - new_id[test] = it2.value();
40 -
41 - } else if (!test.compare("struct2d")) {
42 - structure = it2.value();
43 - new_id[test] = it2.value();
44 -
45 - } else {
46 - new_id[test] = it2.value();
47 - }
48 - }
49 - //cout << "-------begin---------" << endl;
50 -
51 - for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
52 - string id2 = it3.key();
53 - string sequence2, structure2;
54 -
55 - //cout << "id: " << id << " / id2: " << id2 << endl;
56 - for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) {
57 - string chain = it4.key();
58 -
59 - for (auto it5 = js2[id2][chain].begin(); it5 != js2[id2][chain].end(); ++it5) {
60 - string test = it5.key();
61 -
62 - if (!test.compare("sequence")) {
63 - sequence2 = it5.value();
64 - //cout << sequence2 << endl;
65 - if (!sequence.compare(sequence2) && !structure.compare(structure2)) {
66 - //cout << id2 << endl;
67 - vector<string> tmp;
68 - tmp.push_back(id2);
69 - new_id["pdb"] = tmp;
70 - }
71 -
72 - } else if (!test.compare("2D ")) {
73 - structure2 = it5.value();
74 - //cout << structure2 << endl;
75 - }
76 - }
77 - }
78 - //cout << endl;*/
79 - }
80 -
81 -
82 - /*for(uint ii = 0; ii < list_pfams.size(); ii++) {
83 - for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
84 - cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
85 - }
86 - }*/
87 - new_motif[id] = new_id;
88 - new_id.clear();
89 - //cout << "valeur: " << ite << endl;
90 - /*for (uint i = 0; i < tab_struc.size() ; i++) {
91 - cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl;
92 - } */
93 - }
94 - outfile << new_motif.dump(4) << endl;
95 - outfile.close();
96 -}
97 -
98 -int main()
99 -{
100 - string jsonmotifs = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_beta.json";
101 - string jsondssr = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/dssr2.json";
102 - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_fusion_beta.json";
103 - add_pdb(jsonmotifs, jsondssr, out);
104 - return 0;
105 -}
106 -
No preview for this file type
No preview for this file type
No preview for this file type
This diff is collapsed. Click to expand it.
No preview for this file type
1 -#include <iostream>
2 -#include <sstream>
3 -#include <fstream>
4 -#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
5 -#include <typeinfo>
6 -#include <set>
7 -#include <algorithm>
8 -#include <cstdio>
9 -#include <vector>
10 -
11 -using namespace std;
12 -using json = nlohmann::json;
13 -/*
14 -vector<string> find_components(string sequence, string delimiter) {
15 - vector<string> list;
16 - string seq = sequence;
17 - string subseq;
18 - uint fin = 0;
19 -
20 - while(seq.find(delimiter) != string::npos) {
21 - fin = seq.find(delimiter);
22 -
23 - subseq = seq.substr(0, fin);
24 - seq = seq.substr(fin + 1);
25 - list.push_back(subseq); // new component sequence
26 - //std::cout << "subseq: " << subseq << endl;
27 - }
28 - if (!seq.empty()) {
29 - list.push_back(seq);
30 - //std::cout << "subseq: " << seq << endl;
31 - }
32 - return list;
33 -}
34 -
35 -string is_include(vector<string>& components, string sequence, vector<string>& contacts) {
36 -
37 - string seq_contact = "";
38 - vector<uint> positions;
39 - uint count = 0;
40 - uint debut = 0;
41 - string str = components[0];
42 -
43 - uint pos = sequence.find(str, 0);
44 - debut = pos + components[0].size();
45 -
46 - if (pos == 0) {
47 - seq_contact += contacts[0];
48 - } else if (pos <= sequence.size()) {
49 - string gap = "";
50 - for (uint i = 0; i < pos; i++) {
51 - gap += ".";
52 - }
53 - seq_contact += gap + contacts[0];
54 - }
55 - while(pos <= sequence.size() && count < components.size() - 1)
56 - {
57 - string gap = "";
58 - debut = pos + components[count].size();
59 - count++;
60 - str = components[count];
61 - pos = sequence.find(str, pos + components[count-1].size());
62 -
63 - for (uint i = debut; i < pos; i++) {
64 - gap += ".";
65 - }
66 - seq_contact += gap + contacts[count];
67 -
68 - }
69 - if (count == components.size() - 1) {
70 - string gap = "";
71 - if (seq_contact.size() != sequence.size()) {
72 - for (uint i = 0; i < sequence.size() - seq_contact.size(); i++) {
73 - gap += ".";
74 - }
75 - }
76 - seq_contact += gap;
77 - return seq_contact;
78 - }
79 - return std::string();
80 -}*/
81 -/*
82 -//Concatenate the contact field to the motives of the benchmark (which is obtained from the motives library)
83 -string add_contact(const string& jsonbm, const string& jsonmotifs) {
84 - std::ifstream lib(jsonbm);
85 - std::ifstream lib2(jsonmotifs);
86 - string bm2 = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.json";
87 - std::ofstream outfile (bm2);
88 - json new_motif;
89 - json new_id;
90 - json js = json::parse(lib);
91 - json js2 = json::parse(lib2);
92 -
93 - for (auto it = js.begin(); it != js.end(); ++it) {
94 - string id = it.key();
95 - string seq_bm;
96 - string seq_contact;
97 -
98 - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
99 - string test = it2.key();
100 - //cout << "test: " << it2.key() << endl;
101 - if (!test.compare("seq")) {
102 - seq_bm = it2.value();
103 - new_id[test] = it2.value();
104 - } else {
105 - new_id[test] = it2.value();
106 - }
107 - }
108 - //cout << "-------begin---------" << endl;
109 -
110 - for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
111 - string id2 = it3.key();
112 - vector<string> comp;
113 - vector<string> strucs;
114 - vector<string> list_pdbs;
115 - bool flag = false;
116 -
117 - //cout << "id: " << id << " / id2: " << id2 << endl;
118 - for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) {
119 - string test = it4.key();
120 -
121 - if (!test.compare("sequence")) {
122 - string sequence = it4.value();
123 - comp = find_components(sequence, "&");
124 - //cout << id << " / " << id2 << endl;
125 - } else if (!test.compare("contacts")) {
126 - string struc2d = it4.value();
127 - strucs = find_components(struc2d, "&");
128 -
129 - } else if (!test.compare("pdb")) {
130 - vector<string> tab = it4.value();
131 - list_pdbs = tab;
132 - if (find(list_pdbs.begin(), list_pdbs.end(), id) != list_pdbs.end()) {
133 - flag = true;
134 - }
135 - }
136 - }
137 - if (flag) {
138 - seq_contact = is_include(comp, seq_bm, strucs);
139 - //cout << "id: " << id << " id2: " << id2 << " seq_contact: " << seq_contact << endl;
140 - new_id["ctc"] = seq_contact;
141 - }
142 -
143 - }
144 -
145 - new_motif[id] = new_id;
146 - new_id.clear();
147 -
148 - }
149 - outfile << new_motif.dump(4) << endl;
150 - outfile.close();
151 - return bm2;
152 -}*/
153 -
154 -void create_benchmark(const string& jsonmotifs) {
155 - std::ifstream lib(jsonmotifs);
156 - string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/";
157 - string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt";
158 - string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
159 - std::ofstream outlist (list);
160 - std::ofstream outdbn (dbn);
161 - json js = json::parse(lib);
162 - uint count = 0;
163 -
164 - for (auto it = js.begin(); it != js.end(); ++it) {
165 - string id = it.key();
166 - string name, seq, contacts, structure;
167 - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
168 - string chain = it2.key();
169 - if (chain.compare("pfams") != 0) {
170 - string name = id + "_" + chain;
171 - string filename = fasta + name + ".fa";
172 - std::ofstream outfasta (filename);
173 - outfasta << ">test_" << name << endl;
174 - for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) {
175 - string field = it3.key();
176 - if (!field.compare("sequence")) {
177 - seq = it3.value();
178 - outfasta << seq.substr(0,seq.size()) << endl;
179 - outfasta.close();
180 -
181 - } else if (!field.compare("contacts")) {
182 - contacts = it3.value();
183 -
184 - } else if (!field.compare("struct2d")) {
185 - structure = it3.value();
186 - }
187 - }
188 - if(seq.find('&') == string::npos) {
189 - outlist << ">test_" << name << endl;
190 - outdbn << "test_" << name << "." << endl;
191 - outlist << contacts << endl;
192 - outdbn << seq << endl;
193 - outdbn << structure << endl;
194 - outdbn << contacts << endl;
195 - outlist << seq << endl;
196 - outlist << structure << endl;
197 - count++;
198 - }
199 - }
200 - }
201 - }
202 - cout << count << " sequences en tout" << endl;
203 - lib.close();
204 - outlist.close();
205 - outdbn.close();
206 -}
207 -
208 -int main()
209 -{
210 - string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/";
211 - //string jsonmotifs = path + "modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json";
212 - string jsonbm = path + "modules/ISAURE/Motifs_version_initiale/benchmark_16-07-2021.json";
213 -
214 -
215 - //string jsonbm2 = add_contact(jsonbm1, jsonmotifs);
216 - create_benchmark(jsonbm);
217 -
218 - return 0;
219 -}
220 -
No preview for this file type
No preview for this file type
1 ->test_1JJ2
2 -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC
3 ->test_1L9A
4 -GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAUUUGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC
5 ->test_1LNG
6 -UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC
7 ->test_1MFQ
8 -GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC
9 ->test_1SM1
10 -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
11 ->test_1U6P
12 -GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
13 ->test_1Y69
14 -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
15 ->test_1YHQ
16 -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
17 ->test_1YI2
18 -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
19 ->test_2V3C
20 -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
21 ->test_2ZJQ
22 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
23 ->test_2ZJR
24 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
25 ->test_3ADB
26 -GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA
27 ->test_3CUL
28 -GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
29 ->test_3CUN
30 -GAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
31 ->test_3DLL
32 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
33 ->test_3HHN
34 -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
35 ->test_3IVKA
36 -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
37 ->test_3IWN
38 -CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG
39 ->test_3KTW
40 -AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU
41 ->test_3MUM
42 -GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
43 ->test_3MUR
44 -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
45 ->test_3NDB
46 -GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC
47 ->test_3PIO
48 -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
49 ->test_3PIP
50 -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
51 ->test_3UCU
52 -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
53 ->test_3UD4
54 -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
55 ->test_3V7E
56 -GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA
57 ->test_3W3S
58 -GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC
59 ->test_4IO9
60 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
61 ->test_4IOA
62 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
63 ->test_4LCK
64 -GGGUGCGAUGAGAAGAAGAGUAUUAAGGAUUUACUAUGAUUAGCGACUCUAGGAUAGUGAAAGCUAGAGGAUAGUAACCUUAAGAAGGCACUUCGAGCACCC
65 ->test_4P3EA
66 -GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU
67 ->test_4P3EB
68 -GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU
69 ->test_4UYJ
70 -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC
71 ->test_4UYK
72 -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC
73 ->test_4W90
74 -GCGCGCUUAAUCUGAAAUCAGAGCGGGGGACCCAUUGCACUCCGGGUUUUUCCCGUAAGGGGUGAAUCCUUUUUAGGUAGGGCGAAAGCCCGAAUCCGUCAGCUAACCUCGUAAGCGCGC
75 ->test_4WF9
76 -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
77 ->test_4XCO
78 -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
79 ->test_4YB1
80 -GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG
81 ->test_5DM7
82 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
83 ->test_5JVGA
84 -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
85 ->test_5M73
86 -GGUGUCCGCACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGGGAUCGCGCCUA
87 ->test_5NRGA
88 -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
This diff is collapsed. Click to expand it.
1 -> JSON1000_extended
2 -AAUAUCCGGGCGUUUAAUCCCGGGAUAAA
...\ No newline at end of file ...\ No newline at end of file
1 ->test_3DLL
2 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
...\ No newline at end of file ...\ No newline at end of file
1 ->test_1003_1005_110
2 -CCGGGACCUCUAACCGGGUUCCCGGGCAGUCACUG
...\ No newline at end of file ...\ No newline at end of file
1 ->test_927
2 -CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG
3 ->test_170
4 -GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA
5 ->test_768
6 -CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG
7 ->test_770
8 -CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG
9 ->test_266
10 -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
11 ->test_267
12 -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
13 ->test_766
14 -AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG
15 ->test_851
16 -AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG
17 ->test_948
18 -CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG
19 ->test_972
20 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
1 ->test_159
2 -UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
3 ->test_122
4 -GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
5 ->test_264
6 -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
7 ->test_265
8 -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
9 ->test_109
10 -GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA
11 ->test_968
12 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
13 ->test_962
14 -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
15 ->test_62
16 -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
17 -
1 ->test_1010
2 -GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU
3 ->test_1018
4 -GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC
5 ->test_1028
6 -GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC
7 ->test_1034
8 -UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU
9 ->test_1035
10 -GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC
11 ->test_147
12 -AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
13 ->test_72
14 -GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
15 ->test_968
16 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
17 ->test_962
18 -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
19 ->test_62
20 -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
21 -
1 ->test_927
2 -CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG
3 -.((((((.....(&(.&.(((((((&.(.....).&.)))).&&)))...)&).((&.....&.(((((....))))).&....))...)))))).
4 ->test_170
5 -GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA
6 -(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&)))))....
7 ->test_768
8 -CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG
9 -(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&&)...(&(((((&(..&..)&&)))))&)..)
10 ->test_770
11 -CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG
12 -(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&&&((((&(..&..)&))))&)..)
13 ->test_266
14 -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
15 -...................................................................................................
16 ->test_267
17 -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
18 -...................................................................................................
19 ->test_766
20 -AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG
21 -.............................................((((((....))))))
22 ->test_851
23 -AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG
24 -.............................................((((((....))))))
25 ->test_948
26 -CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG
27 -...........................(((((.....)))))
28 ->test_972
29 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
30 -......(((((........))))..)................
...\ No newline at end of file ...\ No newline at end of file
1 ->test_159
2 -UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
3 -..&&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&&((((((.((....))))))))&)...)))))).
4 ->test_122
5 -GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
6 -((((((....(((((&&(&.&..((((((...(.....)...))))..))....)&)))&&))...(&((((((.((....))))))))&)...)))))).
7 ->test_264
8 -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
9 -...................................................................................................
10 ->test_265
11 -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
12 -...................................................................................................
13 ->test_109
14 -GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA
15 -((((((....(((((&&(&..((((((...(.....)...))))..))....)&)))&))...(&&((((((.((....))))))))&)...)))))).
16 ->test_968
17 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
18 -......(((((........))))..)................
19 ->test_962
20 -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
21 -.....(((((........))))..)................
22 ->test_62
23 -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
24 -(((((((((.((((((....))))..)))))))))))
25 -
26 -
27 -
28 -
1 ->test_1010
2 -GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU
3 -..........((((((..((((....))))....))))))..(((..).)).......((((....))))..
4 ->test_1018
5 -GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC
6 -(((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))....
7 ->test_1028
8 -GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC
9 -(((((((..((((........)))).(((((.(...).))))).....(((((.......))))))))))))
10 ->test_1034
11 -UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU
12 -.......(((((.(((..(((.........)))..))).....(...((......)).).)))))
13 ->test_1035
14 -GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC
15 -(((.......(((((.(((..(((.........)))..))).....(...((......)).).)))))...)))
16 ->test_147
17 -AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
18 -.....((((..(((.(((((((((....)))))....)))))))))))((((((((((....))))))))))
19 ->test_72
20 -GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
21 -(((((((................(((..((((.......))))...)))(((((.......))))))))))))....
22 ->test_968
23 -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
24 -......(((((........))))..)................
25 ->test_962
26 -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
27 -.....(((((........))))..)................
28 ->test_62
29 -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
30 -(((((((((.((((((....))))..)))))))))))
31 -
32 -
1 ->test_1JJ2
2 -...************.**.....*.*******.****..***.****************.......****.............*****..***...*****............*******..
3 -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC
4 -...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...(((((.....((((((.((....))))))))....)))))...))))))...
5 ->test_1LNG
6 -................************.....................................*****....***....................
7 -UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC
8 -..(.((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)).)
9 ->test_1U6P
10 -.............................*****..............................................**..................*
11 -GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
12 -.((((..((((((....))))))..)))).....((((..(((.(((((((((....)))))....)))))))))))((((((((((....))))))))))
13 ->test_1Y69
14 -.........***................................................................**.........****...........................
15 -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
16 -(((((((((.....((.(((((....((((((...............)))..)))...)))))..))(((.......((.(((((....))))).)).......)))..)))))))))
17 ->test_1YHQ
18 -...***************......********.****..***.****************.......****............******..***...****.............*******..
19 -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
20 -...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...((.((.....((((((.((....))))))))....)).))...))))))...
21 ->test_2V3C
22 -..............************...........******.****.....**....*********...**********........***....
23 -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
24 -((((..(((((.(((((((((....)))))))))..))))).....(((((.....(((.....(((....))).....)))..)))))..)))).
25 ->test_2ZJQ
26 -......****.**..............********..**.******.******.****..*............*******..***.....******.......*****.....******...
27 -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
28 -.((((((((((.....((.(((((....(((((((...(.....)...))))..)))...)))))..))(((.......((.(((((....))))).)).......)))..)))))))))).
29 ->test_3ADB
30 -*............********.****...............****...*.....................**.**.....*..**....***
31 -GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA
32 -(((((((((..((((((..[.))))))((((((.......))))))(((((((....)))))))((((..]....)))))))))))))....
33 ->test_3CUL
34 -.............................********.**................................................*...
35 -GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
36 -(((((((....)))))).)(((..(((((..........)))))....)))...(((.(((((((((((.......))))))))))).))).
37 ->test_3HHN
38 -...............................................................**...********.**..........................................................
39 -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
40 -((((((((...[[[[[[.))))))))...............[[[[[(...).(.((((((((((((((..........)))))))..((((.]]]]]))))((.((((......)))).)))))))))).]]]]]].
41 ->test_3IWN
42 -....................................................************.*******.....................
43 -CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG
44 -((((......((...(((.((....)).)))..[))...(((.((.(((((..((((..........))))))))).].)))))...)).)).
45 ->test_3KTW
46 -...............*************............................*.........*****...****.................
47 -AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU
48 -..(((..((.(((((((((((((....))))))))))).)).))....(.(((.....(((.....(((....))).....)))..))).).)))
49 ->test_3MUM
50 -....................................................***..*******..**.......................
51 -GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
52 -..(.((......((...((((((....))))))..[))...(((.((((((((..((..........))))))).]))))))...))...)
53 ->test_3MUR
54 -....................................................****.********.**.......................
55 -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
56 -..(.((......((...((((((....))))))..[))...(((.((((((((...(..........).))))).]))))))...))...)
57 ->test_3NDB
58 -.................................*************..........................**....**********..*********.....................................
59 -GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC
60 -((((((..(((((.(((.(((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).))))))).).))))).....))))))
61 ->test_3PIO
62 -.....****.***.............********..**.******.*****..****..*............*******..***....*****.*.......****......******..
63 -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
64 -((((((((((.....((.(((((....((((((...............)))..)))...)))))..)).((.......((.(((((....))))).)).......))...))))))))))
65 ->test_3V7E
66 -........*........**............****...................................................*......................................*
67 -GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA
68 -((((((((....(.(((...(((.[.[[)))......))))(((..(((((((((((((((((.(....).))))))))))))))))).)))...(]].](((((....)))))..))))))))).
69 ->test_3W3S
70 -...................**............................*...........................*....................
71 -GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC
72 -((((((((..((.((((....))))))((((((.......))))).)((((.((((....)))).)))).(((((.......)))))))))))))...
73 ->test_4UYJ
74 -......*.............************.....................**...........*****.................***...................
75 -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC
76 -(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((....)))..))..)))))))))..)))))
77 ->test_4UYK
78 -......*.............************.....................**...........****..........................................***...................
79 -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC
80 -(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((...(((((((((....)))))))))...)))..))..)))))))))..)))))
81 ->test_4WF9
82 -...****..**.............*****.....*...***...******.****.*.............*****.*..***....**..***.....***.......****..
83 -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
84 -.(..(..(.....((((.((......((((((...(.....)...))))..)).....)).)).))............(............)..............)..)..).
85 ->test_4XCO
86 -..............*************........**........................*******....********................
87 -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
88 -((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)))).
89 ->test_4YB1
90 -****.............................................................................*.........
91 -GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG
92 -...((((......((...((((((....))))))..[))...(((.(((((((...((..........)).)))).]))))))...)).))
1 -{
2 - "1": {
3 - "occurences": 1,
4 - "pdb": [
5 - "1A1T"
6 - ],
7 - "sequence": "GGACUAGCGGAGGCUAGUCC",
8 - "struct2d": "((((((((....))))))))"
9 - },
10 - "10": {
11 - "occurences": 1,
12 - "pdb": [
13 - "1AUD"
14 - ],
15 - "sequence": "GGCAGAGUCCUUCGGGACAUUGCACCUG",
16 - "struct2d": "(.(((.((((....)))).......)))"
17 - },
18 - "100": {
19 - "occurences": 1,
20 - "pdb": [
21 - "1N38"
22 - ],
23 - "sequence": "UUAGC",
24 - "struct2d": "...))"
25 - },
26 - "1000": {
27 - "occurences": 1,
28 - "pdb": [
29 - "4Z4C"
30 - ],
31 - "sequence": "CAAUGUGAC",
32 - "struct2d": "))))))))."
33 - },
34 - "1001": {
35 - "occurences": 1,
36 - "pdb": [
37 - "4Z4D"
38 - ],
39 - "sequence": "UUCACAUUGCCCAAGUCU&U",
40 - "struct2d": ".((((((((.........&."
41 - },
42 - "1002": {
43 - "occurences": 1,
44 - "pdb": [
45 - "4Z4I"
46 - ],
47 - "sequence": "CAAUGUGA",
48 - "struct2d": "))))))))"
49 - },
50 - "1003": {
51 - "occurences": 1,
52 - "pdb": [
53 - "4Z4F"
54 - ],
55 - "sequence": "UUCACAUUGCCCAAGU&U",
56 - "struct2d": ".((((((((.......&."
57 - },
58 - "1004": {
59 - "occurences": 1,
60 - "pdb": [
61 - "4Z7L"
62 - ],
63 - "sequence": "GCAAAAUAACAAGC",
64 - "struct2d": "((..........))"
65 - },
66 - "1005": {
67 - "occurences": 1,
68 - "pdb": [
69 - "4ZDOB"
70 - ],
71 - "sequence": "GCCCGGAUGAUCCUCAGUGGUCUGGGGUGCAG&ACCUGU&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCGG",
72 - "struct2d": "(((((((.(..((((((..[.)))))).((((&..))))&((((.&.))))..((((..]....))))).)))))"
73 - },
74 - "1006": {
75 - "occurences": 1,
76 - "pdb": [
77 - "4ZDPA"
78 - ],
79 - "sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG",
80 - "struct2d": "(((((((.(..&(((..[.)))...((((((&.))))))&((((.&.))))..((((..]....))))).))))"
81 - },
82 - "1007": {
83 - "occurences": 1,
84 - "pdb": [
85 - "4ZDPB"
86 - ],
87 - "sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG",
88 - "struct2d": "(((((((.(..&(((..[.)))...(((((.&..)))))&((((.&.))))..((((..]....))))).))))"
89 - },
90 - "1008": {
91 - "occurences": 1,
92 - "pdb": [
93 - "4ZLD"
94 - ],
95 - "sequence": "UAACUUCUGUGAAGUU",
96 - "struct2d": ".((((((...))))))"
97 - }
98 -}
...\ No newline at end of file ...\ No newline at end of file
1 -{
2 - "1": {
3 - "occurences": 3,
4 - "pdb": [
5 - "1A1T"
6 - ],
7 - "pfam": [
8 - [
9 - "UNK13"
10 - ],
11 - [
12 - "PF00539",
13 - "PF08652"
14 - ],
15 - [
16 - "PF00098"
17 - ]
18 - ],
19 - "sequence": "ACUAGCGGAGGCUAGU",
20 - "struct2d": "((((((....))))))"
21 - },
22 - "10006": {
23 - "occurences": 2,
24 - "pdb": [
25 - "1MNB",
26 - "2A9X"
27 - ],
28 - "pfam": [
29 - [
30 - "PF00539",
31 - "PF08652"
32 - ],
33 - [
34 - "UNK13"
35 - ]
36 - ],
37 - "sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
38 - "struct2d": "(((.(((((((....))))))((((....))))))))"
39 - }
40 -}
1 -{
2 - "1":{
3 - "occurences":2,
4 - "pdb":[
5 - "1A1T"
6 - ],
7 - "pfam":[
8 - [
9 - "PF00539",
10 - "PF08652"
11 - ],
12 - [
13 - "PF00098"
14 - ]
15 - ],
16 - "sequence":"ACUAGCGGAGGCUAGU",
17 - "struct2d":"((((((....))))))"
18 - },
19 - "10006":{
20 - "occurences":2,
21 - "pdb":[
22 - "1MNB",
23 - "2A9X"
24 - ],
25 - "pfam":[
26 - [
27 - "PF00539",
28 - "PF08652"
29 - ],
30 - [
31 - "UNK13"
32 - ]
33 - ],
34 - "sequence":"UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
35 - "struct2d":"(((.(((((((....))))))((((....))))))))"
36 - }
37 -}
38 -
39 -
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 -{
2 - "927": {
3 - "contacts": "..****..**...&..&*****.**&**..*****&******&*******&....&*****&.***.....*******&.*****.....*****",
4 - "occurences": 1,
5 - "pdb": [
6 - "5JVGA",
7 - "5JVGB"
8 - ],
9 - "pfam": [
10 - "PF00181",
11 - "PF00237",
12 - "PF00238",
13 - "PF00252",
14 - "PF00276",
15 - "PF00281",
16 - "PF00297",
17 - "PF00298",
18 - "PF00327",
19 - "PF00347",
20 - "PF00453",
21 - "PF00467",
22 - "PF00468",
23 - "PF00471",
24 - "PF00572",
25 - "PF00573",
26 - "PF00673",
27 - "PF00828",
28 - "PF00829",
29 - "PF00830",
30 - "PF00831",
31 - "PF00861",
32 - "PF01016",
33 - "PF01196",
34 - "PF01245",
35 - "PF01386",
36 - "PF01632",
37 - "PF01783",
38 - "PF03947",
39 - "PF14693",
40 - "PF17136"
41 - ],
42 - "sequence": "CCCGUGCCCAUAG&GG&CCACCCCA&CCAUGCCGA&CUGGGU&GUGAAAC&CGCC&AUGAU&CGGACCGCAGGGUCCC&AGUCGGUCAGCGCGGG",
43 - "struct2d": ".((((((.....(&(.&.(((((((&.(.....).&.)))).&)))...)&).((&.....&.(((((....))))).&....))...))))))."
44 - },
45 -
46 - "170": {
47 - "contacts": "*****&......***....**...****............*****.....******.....&.........&..&...*.****",
48 - "occurences": 1,
49 - "pdb": [
50 - "1WZ2"
51 - ],
52 - "pfam": [
53 - "PF00133",
54 - "PF08264"
55 - ],
56 - "sequence": "GCGGG&GUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCC&GUUCGAAUC&GC&CCCGCACCA",
57 - "struct2d": "(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&)))))...."
58 - },
59 -
60 - "768": {
61 - "contacts": "..*.&..........************.....................**..&.....&*****&...&***&.....&....",
62 - "occurences": 1,
63 - "pdb": [
64 - "4UYJ"
65 - ],
66 - "pfam": [
67 - "PF02290",
68 - "PF05486"
69 - ],
70 - "sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAGG&GUU&UUC&CCUCG&CGUG",
71 - "struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&(((((&(..&..)&)))))&)..)"
72 - },
73 -
74 - "770": {
75 - "contacts": "..*.&..........************.....................**..&.....&****&...&***&....&....",
76 - "occurences": 1,
77 - "pdb": [
78 - "4UYK"
79 - ],
80 - "pfam": [
81 - "PF02290",
82 - "PF05486"
83 - ],
84 - "sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAG&GUU&UUC&CUCG&CGUG",
85 - "struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&((((&(..&..)&))))&)..)"
86 - },
87 - "266": {
88 - "contacts": "***************************************************************************************************",
89 - "occurences": 1,
90 - "pdb": [
91 - "2GTT"
92 - ],
93 - "pfam": [
94 - "PF00945"
95 - ],
96 - "sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC",
97 - "struct2d": "..................................................................................................."
98 - },
99 - "267": {
100 - "contacts": "***************************************************************************************************",
101 - "occurences": 1,
102 - "pdb": [
103 - "2GTT"
104 - ],
105 - "pfam": [
106 - "PF00945"
107 - ],
108 - "sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC",
109 - "struct2d": "..................................................................................................."
110 - },
111 -
112 - "766": {
113 - "contacts": "***************************************************..********",
114 - "occurences": 1,
115 - "pdb": [
116 - "4U7U"
117 - ],
118 - "pfam": [
119 - "PF08798",
120 - "PF09344",
121 - "PF09481",
122 - "PF09485",
123 - "PF09704"
124 - ],
125 - "sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG",
126 - "struct2d": ".............................................((((((....))))))"
127 - },
128 -
129 - "851": {
130 - "contacts": "***************************************************...*******",
131 - "occurences": 1,
132 - "pdb": [
133 - "5CD4"
134 - ],
135 - "pfam": [
136 - "PF08798",
137 - "PF09344",
138 - "PF09481",
139 - "PF09485",
140 - "PF09704"
141 - ],
142 - "sequence": "AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG",
143 - "struct2d": ".............................................((((((....))))))"
144 - },
145 -
146 - "948": {
147 - "contacts": "******************************************",
148 - "occurences": 1,
149 - "pdb": [
150 - "5O7H"
151 - ],
152 - "pfam": [
153 - "PF09618"
154 - ],
155 - "sequence": "CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG",
156 - "struct2d": "...........................(((((.....)))))"
157 - },
158 -
159 - "972": {
160 - "contacts": "******************************************",
161 - "occurences": 1,
162 - "pdb": [
163 - "5WLH"
164 - ],
165 - "pfam": [
166 - "UNK81"
167 - ],
168 - "sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
169 - "struct2d": "......(((((........))))..)................"
170 - },
171 -
172 -
173 -
174 -
175 -
176 -}
...\ No newline at end of file ...\ No newline at end of file
1 -{
2 - "159": {
3 - "contacts": "..&************&**&.&******.*.****..***.****************&..&.****.&...*****...**...*****&....*******",
4 - "occurences": 1,
5 - "pdb": [
6 - "1VQ6"
7 - ],
8 - "pfam": [
9 - "PF00181",
10 - "PF00237",
11 - "PF00238",
12 - "PF00252",
13 - "PF00276",
14 - "PF00281",
15 - "PF00297",
16 - "PF00298",
17 - "PF00327",
18 - "PF00347",
19 - "PF00466",
20 - "PF00467",
21 - "PF00572",
22 - "PF00573",
23 - "PF00673",
24 - "PF00827",
25 - "PF00828",
26 - "PF00831",
27 - "PF00832",
28 - "PF00935",
29 - "PF01157",
30 - "PF01198",
31 - "PF01246",
32 - "PF01248",
33 - "PF01280",
34 - "PF01655",
35 - "PF01780",
36 - "PF01907",
37 - "PF03947",
38 - "PF16906",
39 - "PF17144"
40 - ],
41 - "sequence": "UU&GGCGGCCACAGC&GU&G&GCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&AC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
42 - "struct2d": "..&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&((((((.((....))))))))&)...))))))."
43 - },
44 - "122": {
45 - "contacts": "***************&.&*&*****.******..***.****************&...&.****.&...*****..***...*****&....*******",
46 - "occurences": 1,
47 - "pdb": [
48 - "1Q81",
49 - "1Q82",
50 - "3CPW"
51 - ],
52 - "pfam": [
53 - "PF00181",
54 - "PF00237",
55 - "PF00238",
56 - "PF00252",
57 - "PF00276",
58 - "PF00281",
59 - "PF00297",
60 - "PF00327",
61 - "PF00347",
62 - "PF00466",
63 - "PF00467",
64 - "PF00572",
65 - "PF00573",
66 - "PF00673",
67 - "PF00827",
68 - "PF00828",
69 - "PF00831",
70 - "PF00832",
71 - "PF00935",
72 - "PF01157",
73 - "PF01198",
74 - "PF01246",
75 - "PF01248",
76 - "PF01280",
77 - "PF01655",
78 - "PF01780",
79 - "PF01907",
80 - "PF03947",
81 - "PF16906",
82 - "PF17144"
83 - ],
84 - "sequence": "GGCGGCCACAGCGGU&G&U&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
85 - "struct2d": "((((((....(((((&(&.&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))."
86 - },
87 - "264": {
88 - "contacts": "***************************************************************************************************",
89 - "occurences": 1,
90 - "pdb": [
91 - "2GTT"
92 - ],
93 - "pfam": [
94 - "PF00945"
95 - ],
96 - "sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC",
97 - "struct2d": "..................................................................................................."
98 - },
99 - "265": {
100 - "contacts": "***************************************************************************************************",
101 - "occurences": 1,
102 - "pdb": [
103 - "2GTT"
104 - ],
105 - "pfam": [
106 - "PF00945"
107 - ],
108 - "sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC",
109 - "struct2d": "..................................................................................................."
110 - },
111 - "109": {
112 - "contacts": "***************&.&************..***.****************&...&.****.&...*****...**...*****&....*******",
113 - "occurences": 2,
114 - "pdb": [
115 - "1N8R",
116 - "1W2B"
117 - ],
118 - "pfam": [
119 - "PF00181",
120 - "PF00237",
121 - "PF00238",
122 - "PF00252",
123 - "PF00276",
124 - "PF00281",
125 - "PF00297",
126 - "PF00327",
127 - "PF00347",
128 - "PF00466",
129 - "PF00467",
130 - "PF00572",
131 - "PF00573",
132 - "PF00673",
133 - "PF00827",
134 - "PF00828",
135 - "PF00831",
136 - "PF00832",
137 - "PF00935",
138 - "PF01157",
139 - "PF01198",
140 - "PF01246",
141 - "PF01248",
142 - "PF01280",
143 - "PF01655",
144 - "PF01780",
145 - "PF01907",
146 - "PF03947",
147 - "PF05697",
148 - "PF16906",
149 - "PF17144"
150 - ],
151 - "sequence": "GGCGGCCACAGCGGU&G&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
152 - "struct2d": "((((((....(((((&(&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))."
153 - },
154 - "762": {
155 - "contacts": "***************************************************..********",
156 - "occurences": 1,
157 - "pdb": [
158 - "4U7U"
159 - ],
160 - "pfam": [
161 - "PF08798",
162 - "PF09344",
163 - "PF09481",
164 - "PF09485",
165 - "PF09704"
166 - ],
167 - "sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG",
168 - "struct2d": ".............................................((((((....))))))"
169 - },
170 - "968": {
171 - "contacts": "******************************************",
172 - "occurences": 1,
173 - "pdb": [
174 - "5WLH"
175 - ],
176 - "pfam": [
177 - "UNK81"
178 - ],
179 - "sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
180 - "struct2d": "......(((((........))))..)................"
181 - },
182 - "962": {
183 - "contacts": "*****************************************",
184 - "occurences": 2,
185 - "pdb": [
186 - "5W1H"
187 - ],
188 - "pfam": [
189 - "UNK75"
190 - ],
191 - "sequence": "AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
192 - "struct2d": ".....(((((........))))..)................"
193 - },
194 - "62": {
195 - "contacts": ".*****..********.......*****.**....**",
196 - "occurences": 1,
197 - "pdb": [
198 - "1I6U"
199 - ],
200 - "pfam": [
201 - "PF00410"
202 - ],
203 - "sequence": "GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC",
204 - "struct2d": "(((((((((.((((((....))))..)))))))))))"
205 - },
206 -}
This diff could not be displayed because it is too large.
1 -{
2 - "1": {
3 - "occurences": 2,
4 - "pdb": [
5 - "1A1T"
6 - ],
7 - "pfam": [
8 - "PF00098"
9 - ],
10 - "sequence": "ACUAGCGGAGGCUAGU",
11 - "struct2d": "((((((....))))))"
12 - },
13 - "100006": {
14 - "occurences": 2,
15 - "pdb": [
16 - "1MNB",
17 - "2A9X"
18 - ],
19 - "pfam": [
20 - "PF00539",
21 - "UNK13"
22 - ],
23 - "sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
24 - "struct2d": "(((.(((((((....))))))((((....))))))))"
25 - },
26 - "104": {
27 - "occurences": 3,
28 - "pdb": [
29 - "1MNB",
30 - "2A9X"
31 - ],
32 - "pfam": [
33 - "PF00539",
34 - "UNK13"
35 - ],
36 - "sequence": "UCGUG&AGCUCAUUAGCUCCGA",
37 - "struct2d": "(((.(&((((....))))))))"
38 - }
39 -}
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 -{
2 - "103": {
3 - "occurences": 1,
4 - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
5 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
6 - },
7 - "1103": {
8 - "occurences": 1,
9 - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
10 - "struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...."
11 - },
12 - "1104": {
13 - "occurences": 1,
14 - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
15 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
16 - },
17 - "111": {
18 - "occurences": 1,
19 - "sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA",
20 - "struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...."
21 - },
22 - "141": {
23 - "occurences": 1,
24 - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
25 - "struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...."
26 - },
27 - "16": {
28 - "occurences": 1,
29 - "sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA",
30 - "struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...."
31 - },
32 - "281": {
33 - "occurences": 1,
34 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
35 - "struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....."
36 - },
37 - "282": {
38 - "occurences": 1,
39 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
40 - "struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....."
41 - },
42 - "37": {
43 - "occurences": 1,
44 - "sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA",
45 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
46 - },
47 - "453": {
48 - "occurences": 1,
49 - "sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC",
50 - "struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).."
51 - },
52 - "454": {
53 - "occurences": 1,
54 - "sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC",
55 - "struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))."
56 - },
57 - "46": {
58 - "occurences": 1,
59 - "sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA",
60 - "struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...."
61 - },
62 - "470": {
63 - "occurences": 1,
64 - "sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA",
65 - "struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...."
66 - },
67 - "536": {
68 - "occurences": 2,
69 - "sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA",
70 - "struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))"
71 - },
72 - "645": {
73 - "occurences": 1,
74 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
75 - "struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......."
76 - },
77 - "671": {
78 - "occurences": 1,
79 - "sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC",
80 - "struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))"
81 - },
82 - "680": {
83 - "occurences": 1,
84 - "sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC",
85 - "struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))"
86 - },
87 - "72": {
88 - "occurences": 1,
89 - "sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA",
90 - "struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...."
91 - },
92 - "955": {
93 - "occurences": 1,
94 - "sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC",
95 - "struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...."
96 - },
97 - "985": {
98 - "occurences": 1,
99 - "sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU",
100 - "struct2d": ".............................................................................."
101 - }
102 -}
1 -{
2 - "103": {
3 - "occurences": 1,
4 - "pdb": [
5 - "1TTT"
6 - ],
7 - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
8 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
9 - },
10 - "1103": {
11 - "occurences": 1,
12 - "pdb": [
13 - "5HC9"
14 - ],
15 - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
16 - "struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...."
17 - },
18 - "1104": {
19 - "occurences": 1,
20 - "pdb": [
21 - "5HC9"
22 - ],
23 - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
24 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
25 - },
26 - "111": {
27 - "occurences": 1,
28 - "pdb": [
29 - "1QF6"
30 - ],
31 - "sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA",
32 - "struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...."
33 - },
34 - "141": {
35 - "occurences": 1,
36 - "pdb": [
37 - "1TTT"
38 - ],
39 - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
40 - "struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...."
41 - },
42 - "16": {
43 - "occurences": 1,
44 - "pdb": [
45 - "1C0A"
46 - ],
47 - "sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA",
48 - "struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...."
49 - },
50 - "281": {
51 - "occurences": 1,
52 - "pdb": [
53 - "2FMT"
54 - ],
55 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
56 - "struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....."
57 - },
58 - "282": {
59 - "occurences": 1,
60 - "pdb": [
61 - "2FMT"
62 - ],
63 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
64 - "struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....."
65 - },
66 - "37": {
67 - "occurences": 1,
68 - "pdb": [
69 - "1EIY"
70 - ],
71 - "sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA",
72 - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
73 - },
74 - "453": {
75 - "occurences": 1,
76 - "pdb": [
77 - "2ZUFB"
78 - ],
79 - "sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC",
80 - "struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).."
81 - },
82 - "454": {
83 - "occurences": 1,
84 - "pdb": [
85 - "2ZZM"
86 - ],
87 - "sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC",
88 - "struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))."
89 - },
90 - "46": {
91 - "occurences": 1,
92 - "pdb": [
93 - "1F7U"
94 - ],
95 - "sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA",
96 - "struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...."
97 - },
98 - "470": {
99 - "occurences": 1,
100 - "pdb": [
101 - "3AMU"
102 - ],
103 - "sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA",
104 - "struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...."
105 - },
106 - "536": {
107 - "occurences": 2,
108 - "pdb": [
109 - "3IVKB"
110 - ],
111 - "sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA",
112 - "struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))"
113 - },
114 - "645": {
115 - "occurences": 1,
116 - "pdb": [
117 - "3QSY"
118 - ],
119 - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
120 - "struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......."
121 - },
122 - "671": {
123 - "occurences": 1,
124 - "pdb": [
125 - "3UMY"
126 - ],
127 - "sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC",
128 - "struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))"
129 - },
130 - "680": {
131 - "occurences": 1,
132 - "pdb": [
133 - "3W3S"
134 - ],
135 - "sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC",
136 - "struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))"
137 - },
138 - "72": {
139 - "occurences": 1,
140 - "pdb": [
141 - "1J2B"
142 - ],
143 - "sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA",
144 - "struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...."
145 - },
146 - "955": {
147 - "occurences": 1,
148 - "pdb": [
149 - "4X0B"
150 - ],
151 - "sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC",
152 - "struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...."
153 - },
154 - "985": {
155 - "occurences": 1,
156 - "pdb": [
157 - "4XJN"
158 - ],
159 - "sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU",
160 - "struct2d": ".............................................................................."
161 - }
162 -}
This diff could not be displayed because it is too large.
1 ->test_CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE__PDB_00376
2 -.......***.......................******..................................
3 -GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
4 -((((((..(((.........)))((((((((...))))))))...(((((.......))))))))))).....
5 ->test_GUANINE_RIBOSWITCH_U22C,_A52G_MUTANT_BOUND_TO_HYPOXANTHINE__PDB_01023
6 -............................*********.............................**
7 -GGACAUACAAUCGCGUGGAUAUGGCACGCAAGUUUCUGCCGGGCACCGUAAAUGUCCGACUAUGUCCa
8 -(((((((...(((((((.[[..[[)))))))........((((((]]...]]))))))..))))))).
9 ->test_SOLUTION_STRUCTURE_OF_THE_P2B-P3_PSEUDOKNOT_FROM_HUMAN_TELOMERASE_RNA__PDB_00857
10 -.............................*****.............
11 -GGGCUGUUUUUCUCGCUGACUUUCAGCCCCAAACAAAAAAGUCAGCA
12 -[[[[[[........(((((((((]]]]]]........))))))))).
1 +The motif library used with --contacts is particular. It was provided by Isaure Chauvot de Beauchêne from the LORIA
2 +laboratory. These motifs are made up of RNA fragments linked to proteins.
3 +==================================================================================================================
4 +
5 +Several versions of these designs have been provided, but the most complete is the latest:'motifs_06-06-2021.json'
6 +The current scripts were created based on this file, and doesn't work with the other older libraries.
7 +
8 +There is also 2 benchmarks files also in json format : 'benchmark_16-06-2021.json' and 'benchmark_16-07-2021.json'.
9 +It contains complete RNA sequences that bind to a protein, the first one contains only 33 RNA, and the second one
10 +contains 130 RNA.
11 +
12 +The benchmark.dbn and benchmark.txt were created based on the 'benchmark_16-07-2021.json'.
13 +They are mostly used for the Isaure_benchmark.py script and scripts from the 'scripts' directory.
14 +
15 +The motifs_final.json it obtains after executing the count_pattern.cpp script in 'script' directory on
16 +the 'motifs_06-06-2021.json' motifs file.
17 +This script count the number of "occurrences" of the motif. So we consider that if the sequence of motif A
18 +is included in motif B, then for each inclusion of B we also have an inclusion of A. And vice versa.
19 +
20 +The motif library used by BiORSEO is the one in the 'bibliotheque_a_lire' directory. There should only be
21 +the json file we wish to be used by BiORSEO for it's prediction. That's why you shouldn't put other type of file!
22 +
23 +
24 +
25 +
26 +
27 +
...@@ -22341,23 +22341,6 @@ ...@@ -22341,23 +22341,6 @@
22341 "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", 22341 "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA",
22342 "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." 22342 "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...."
22343 }, 22343 },
22344 - "977": {
22345 - "contacts": "******************..&****************.**************&*************&*",
22346 - "occurences": 1,
22347 - "pdb": [
22348 - "5XBL"
22349 - ],
22350 - "pfam": [
22351 - [
22352 - "PF16592",
22353 - "PF16593",
22354 - "PF16595",
22355 - "PF13395"
22356 - ]
22357 - ],
22358 - "sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U",
22359 - "struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&."
22360 - },
22361 "978": { 22344 "978": {
22362 "contacts": "*****", 22345 "contacts": "*****",
22363 "occurences": 9, 22346 "occurences": 9,
......
...@@ -22341,23 +22341,6 @@ ...@@ -22341,23 +22341,6 @@
22341 "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", 22341 "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA",
22342 "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." 22342 "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...."
22343 }, 22343 },
22344 - "977": {
22345 - "contacts": "******************..&****************.**************&*************&*",
22346 - "occurences": 1,
22347 - "pdb": [
22348 - "5XBL"
22349 - ],
22350 - "pfam": [
22351 - [
22352 - "PF16592",
22353 - "PF16593",
22354 - "PF16595",
22355 - "PF13395"
22356 - ]
22357 - ],
22358 - "sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U",
22359 - "struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&."
22360 - },
22361 "978": { 22344 "978": {
22362 "contacts": "*****", 22345 "contacts": "*****",
22363 "occurences": 9, 22346 "occurences": 9,
......
...@@ -6,6 +6,9 @@ import seaborn as sns ...@@ -6,6 +6,9 @@ import seaborn as sns
6 import pandas as pd 6 import pandas as pd
7 import matplotlib.pylab as plt 7 import matplotlib.pylab as plt
8 8
9 +# Retrieve for each rna the best value for MEA and compare this energy value with the one obtains with
10 +# RNAeval and RNAfold from the ViennaRNA Package 2.0 (Ronny Lorentz et al., 2011)
11 +# After getting those values, it will creates a figure.
9 def get_result_MEA(filename): 12 def get_result_MEA(filename):
10 ext = "json_pmE" 13 ext = "json_pmE"
11 file2 = open( "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/" + filename + ext, "r") 14 file2 = open( "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/" + filename + ext, "r")
......
1 -from math import sqrt, ceil
2 -import numpy as np
3 -import matplotlib.pyplot as plt
4 -
5 -file = open("/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn", "r")
6 -name = file.readline()
7 -rna = file.readline()
8 -twod = file.readline()
9 -contacts = file.readline()
10 -length = len(rna)
11 -nb_ctc = contacts.count('*')
12 -print("--------------------------------------------------------")
13 -
14 -ctc_max = nb_ctc
15 -ctc_min = nb_ctc
16 -
17 -np_lgt = []
18 -np_lgt.append(length)
19 -
20 -np_ctc = []
21 -np_ctc.append(nb_ctc)
22 -
23 -np = []
24 -np.append([length, nb_ctc])
25 -
26 -while name:
27 - print(contacts)
28 - print(length)
29 - print(nb_ctc)
30 - print("--------------------------------------------------------")
31 -
32 - name = file.readline()
33 - rna = file.readline()
34 - length = len(rna)
35 - if length != 0 :
36 - np_lgt.append(length)
37 - twod = file.readline()
38 - contacts = file.readline()
39 - nb_ctc = contacts.count('*')
40 - if nb_ctc != 0:
41 - np_ctc.append(nb_ctc)
42 - np.append([length, nb_ctc])
43 - if nb_ctc > ctc_max:
44 - ctc_max = nb_ctc
45 - if nb_ctc < ctc_min and nb_ctc != 0:
46 - ctc_min = nb_ctc
47 -file.close()
48 -print(np_lgt)
49 -print(np_ctc)
50 -print(np)
51 -
52 -x = np_lgt
53 -y = np_ctc
54 -
55 -index = np_ctc.index(ctc_max)
56 -index2 = np_ctc.index(ctc_min)
57 -
58 -plt.scatter(x, y, c = 'blue')
59 -plt.annotate("(" + str(np_lgt[index]) + "," + str(ctc_max) + ")", (np_lgt[index], ctc_max),c ='red')
60 -plt.scatter(np_lgt[index], ctc_max,c = 'red')
61 -plt.annotate("(" + str(np_lgt[index2]) + "," + str(ctc_min) + ")", (np_lgt[index2], ctc_min),c ='green')
62 -plt.scatter(np_lgt[index2], ctc_min,c = 'green')
63 -
64 -plt.xlabel('longeur de l\'arn')
65 -plt.ylabel('nombre de contacts')
66 -plt.savefig('stats.png')
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
11 using namespace std; 11 using namespace std;
12 using json = nlohmann::json; 12 using json = nlohmann::json;
13 13
14 +//Count the number of '&' in the motif sequence
14 size_t count_delimiter(string& seq) { 15 size_t count_delimiter(string& seq) {
15 size_t count = 0; 16 size_t count = 0;
16 for(uint i = 0; i < seq.size(); i++) { 17 for(uint i = 0; i < seq.size(); i++) {
...@@ -22,6 +23,10 @@ size_t count_delimiter(string& seq) { ...@@ -22,6 +23,10 @@ size_t count_delimiter(string& seq) {
22 return count; 23 return count;
23 } 24 }
24 25
26 +/*
27 +If there is a '&' in the motif sequence in the field 'sequence' but not in the field 'contacts',
28 +th script put a '&' in the same position in the field 'contacts' than in the field 'sequence'.
29 +*/
25 void add_delimiter(const string& jsonfile, const string& jsonoutfile) { 30 void add_delimiter(const string& jsonfile, const string& jsonoutfile) {
26 std::ifstream lib(jsonfile); 31 std::ifstream lib(jsonfile);
27 32
...@@ -77,13 +82,9 @@ void add_delimiter(const string& jsonfile, const string& jsonoutfile) { ...@@ -77,13 +82,9 @@ void add_delimiter(const string& jsonfile, const string& jsonoutfile) {
77 82
78 int main() 83 int main()
79 { 84 {
80 - //183 85 + string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json";
81 - //cout << "------------------BEGIN-----------------" << endl; 86 + string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_tmp.json";
82 - string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json";
83 - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_tmp.json";
84 add_delimiter(jsonfile, out); 87 add_delimiter(jsonfile, out);
85 -
86 - //cout << "------------------END-----------------" << endl;
87 return 0; 88 return 0;
88 } 89 }
89 90
......
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
11 using namespace std; 11 using namespace std;
12 using json = nlohmann::json; 12 using json = nlohmann::json;
13 13
14 +/*
15 +This script count the number of "occurrences" of the motif.
16 +So we consider that if the sequence of pattern A is included in pattern B,
17 +then for each inclusion of B we also have an inclusion of A. And vice versa.
18 +*/
19 +
14 //Return true if the first sequence seq1 is included in the second sequence seq2 20 //Return true if the first sequence seq1 is included in the second sequence seq2
15 //if not return false 21 //if not return false
16 int is_contains(string& seq1, string& seq2) { 22 int is_contains(string& seq1, string& seq2) {
...@@ -38,6 +44,8 @@ int is_contains(string& seq1, string& seq2) { ...@@ -38,6 +44,8 @@ int is_contains(string& seq1, string& seq2) {
38 44
39 //If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B, 45 //If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B,
40 //remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list. 46 //remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list.
47 +//The pattern A is counted only once in every other pattern, i.e. even if the sequence of A is found several times in B,
48 +// it will be added only once in the occurrences of A.
41 void counting_occurences(const string& jsonfile, const string& jsonoutfile) { 49 void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
42 std::ifstream lib(jsonfile); 50 std::ifstream lib(jsonfile);
43 std::ifstream lib2(jsonfile); 51 std::ifstream lib2(jsonfile);
...@@ -73,14 +81,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -73,14 +81,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
73 if (!test.compare("pfam")) { 81 if (!test.compare("pfam")) {
74 vector<vector<string>> tab = it2.value(); 82 vector<vector<string>> tab = it2.value();
75 list_pfams = tab; 83 list_pfams = tab;
76 - /*set<set<string>>::iterator iit;
77 - set<string>::iterator iit2;
78 - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
79 - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
80 - cout << *iit2 << endl;
81 - }
82 - cout << endl << endl;
83 - }*/
84 } else if (!test.compare("sequence")) { 84 } else if (!test.compare("sequence")) {
85 //cout << "sequence: " << it2.value() << endl; 85 //cout << "sequence: " << it2.value() << endl;
86 sequence = it2.value(); 86 sequence = it2.value();
...@@ -124,7 +124,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -124,7 +124,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
124 new_id[test] = it2.value(); 124 new_id[test] = it2.value();
125 } 125 }
126 } 126 }
127 - //cout << "-------begin---------" << endl;
128 127
129 for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) { 128 for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
130 string id2 = it3.key(); 129 string id2 = it3.key();
...@@ -142,22 +141,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -142,22 +141,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
142 if (!test.compare("pfam")) { 141 if (!test.compare("pfam")) {
143 vector<vector<string>> tab = it4.value(); 142 vector<vector<string>> tab = it4.value();
144 list_pfams2 = tab; 143 list_pfams2 = tab;
145 - /*for (uint k = 0; k < tab2.size(); k++) {
146 - for (uint l = 0; l < tab2[k].size(); l++) {
147 - pfams2.insert(tab2[k][l]);
148 - }
149 - list_pfams2.insert(pfams);
150 - pfams2.clear();
151 - }*/
152 -
153 - /*set<set<string>>::iterator iit;
154 - set<string>::iterator iit2;
155 - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
156 - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
157 - cout << *iit2 << endl;
158 - }
159 - cout << endl << endl;
160 - }*/
161 } else if (!test.compare("occurences")) { 144 } else if (!test.compare("occurences")) {
162 occurences2 = it4.value(); 145 occurences2 = it4.value();
163 //cout << "occurences2: "<< occurences2 << endl; 146 //cout << "occurences2: "<< occurences2 << endl;
...@@ -216,7 +199,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -216,7 +199,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
216 199
217 } 200 }
218 } 201 }
219 - //cout << "----end----" << endl;
220 //} 202 //}
221 } 203 }
222 if(flag) { 204 if(flag) {
...@@ -242,23 +224,12 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -242,23 +224,12 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
242 //cout << endl;*/ 224 //cout << endl;*/
243 } 225 }
244 226
245 -
246 - /*for(uint ii = 0; ii < list_pfams.size(); ii++) {
247 - for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
248 - cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
249 - }
250 - }*/
251 227
252 new_id["occurences"] = list_pfams.size(); 228 new_id["occurences"] = list_pfams.size();
253 - new_id["pfam"] = list_pfams; 229 + new_id["pfam"] = list_pfams;
254 -
255 - //cout << "-------ending---------" << endl;
256 new_motif[id] = new_id; 230 new_motif[id] = new_id;
257 new_id.clear(); 231 new_id.clear();
258 - //cout << "valeur: " << ite << endl; 232 +
259 - /*for (uint i = 0; i < tab_struc.size() ; i++) {
260 - cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl;
261 - } */
262 } 233 }
263 outfile << new_motif.dump(4) << endl; 234 outfile << new_motif.dump(4) << endl;
264 outfile.close(); 235 outfile.close();
...@@ -267,13 +238,11 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { ...@@ -267,13 +238,11 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
267 238
268 int main() 239 int main()
269 { 240 {
270 - //183 241 +
271 - //cout << "------------------BEGIN-----------------" << endl; 242 + string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json";
272 - string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json"; 243 + string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json";
273 - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
274 counting_occurences(jsonfile, out); 244 counting_occurences(jsonfile, out);
275 245
276 - //cout << "------------------END-----------------" << endl;
277 return 0; 246 return 0;
278 } 247 }
279 248
......
1 +#include <iostream>
2 +#include <sstream>
3 +#include <fstream>
4 +#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
5 +#include <typeinfo>
6 +#include <set>
7 +#include <algorithm>
8 +#include <cstdio>
9 +#include <vector>
10 +
11 +using namespace std;
12 +using json = nlohmann::json;
13 +
14 +/*
15 +Create a .fasta file for each of the sequence inside the benchmark in json format.
16 +Also create a .dbn and .txt file that list the name, sequence, 2d structure and contacts for all sequence in the benchmark file.
17 +Those files are useful for the Isaure_benchmark.py script.
18 +*/
19 +void create_files(const string& jsonmotifs) {
20 + std::ifstream lib(jsonmotifs);
21 + string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/";
22 + string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt";
23 + string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
24 + std::ofstream outlist (list);
25 + std::ofstream outdbn (dbn);
26 + json js = json::parse(lib);
27 + uint count = 0;
28 +
29 + for (auto it = js.begin(); it != js.end(); ++it) {
30 + string id = it.key();
31 + string name, seq, contacts, structure;
32 + for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
33 + string chain = it2.key();
34 + if (chain.compare("pfams") != 0) {
35 + string name = id + "_" + chain;
36 + string filename = fasta + name + ".fa";
37 + std::ofstream outfasta (filename);
38 + outfasta << ">test_" << name << endl;
39 + for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) {
40 + string field = it3.key();
41 + if (!field.compare("sequence")) {
42 + seq = it3.value();
43 + outfasta << seq.substr(0,seq.size()) << endl;
44 + outfasta.close();
45 +
46 + } else if (!field.compare("contacts")) {
47 + contacts = it3.value();
48 +
49 + } else if (!field.compare("struct2d")) {
50 + structure = it3.value();
51 + }
52 + }
53 + if(seq.find('&') == string::npos) {
54 + outlist << ">test_" << name << endl;
55 + outdbn << "test_" << name << "." << endl;
56 + outlist << contacts << endl;
57 + outdbn << seq << endl;
58 + outdbn << structure << endl;
59 + outdbn << contacts << endl;
60 + outlist << seq << endl;
61 + outlist << structure << endl;
62 + count++;
63 + }
64 + }
65 + }
66 + }
67 + cout << count << " sequences en tout" << endl;
68 + lib.close();
69 + outlist.close();
70 + outdbn.close();
71 +}
72 +
73 +int main()
74 +{
75 + string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/";
76 + string jsonbm = path + "modules/ISAURE/benchmark_16-07-2021.json";
77 + create_files(jsonbm);
78 +
79 + return 0;
80 +}
81 +
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
12 using namespace std; 12 using namespace std;
13 using json = nlohmann::json; 13 using json = nlohmann::json;
14 14
15 +/*
16 +This script is use to create a new motif library without a motif that contains the same pdb as the sequence used in input for prediction
17 +with BiORSEO.
18 +*/
15 void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) { 19 void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) {
16 std::ifstream lib(jsonlibrary); 20 std::ifstream lib(jsonlibrary);
17 21
...@@ -51,8 +55,8 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& name, const s ...@@ -51,8 +55,8 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& name, const s
51 55
52 int main(int argc, char** argv) 56 int main(int argc, char** argv)
53 { 57 {
54 - string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json"; 58 + string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/motifs_final.json";
55 - string out = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; 59 + string out = "/local/local/BiorseoNath/data/modules/ISAURE/bibliotheque_a_lire/motifs_final.json";
56 string name = argv[1]; 60 string name = argv[1];
57 delete_redundant_pdb(jsonlibrary, name, out); 61 delete_redundant_pdb(jsonlibrary, name, out);
58 return 0; 62 return 0;
......
...@@ -12,18 +12,23 @@ using namespace std; ...@@ -12,18 +12,23 @@ using namespace std;
12 using json = nlohmann::json; 12 using json = nlohmann::json;
13 13
14 /* 14 /*
15 -That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. 15 +That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from (with the same pdb).
16 */ 16 */
17 17
18 +//To store the pdb and the sequence in the benchmark file. Also stor the corresponding motif id and components based on this sequence.
18 struct data { 19 struct data {
20 + //the pdb code (in the name of the sequence)
19 string pdb; 21 string pdb;
22 + //the complete sequence with this pdb code
20 string seq_pdb; 23 string seq_pdb;
24 + //the id of the motif corresponding to this pdb in the library
21 string id; 25 string id;
26 + //the module sequence with the components of this motif with the above id
22 string cmp; 27 string cmp;
23 }; 28 };
24 typedef struct data data; 29 typedef struct data data;
25 30
26 - 31 +//returns the list of pdb codes and the corresponding information from the benchmark file.
27 vector<data> get_list_pdb_benchmark(const string& benchmark) { 32 vector<data> get_list_pdb_benchmark(const string& benchmark) {
28 33
29 fstream bm(benchmark); 34 fstream bm(benchmark);
...@@ -57,6 +62,7 @@ string trim(string str) { ...@@ -57,6 +62,7 @@ string trim(string str) {
57 return str; 62 return str;
58 } 63 }
59 64
65 +//store the corresponding id and motif to the sequence from the benchmark file
60 data find_id_pattern(string& pdb_pattern, const string& benchmark) { 66 data find_id_pattern(string& pdb_pattern, const string& benchmark) {
61 vector<data> l = get_list_pdb_benchmark(benchmark); 67 vector<data> l = get_list_pdb_benchmark(benchmark);
62 int size = l.size(); 68 int size = l.size();
...@@ -71,6 +77,8 @@ data find_id_pattern(string& pdb_pattern, const string& benchmark) { ...@@ -71,6 +77,8 @@ data find_id_pattern(string& pdb_pattern, const string& benchmark) {
71 return data(); 77 return data();
72 } 78 }
73 79
80 +//Create an array of data ('association'), which consists of each pdb of the benchmark file
81 +// with the associated pattern from this sequence.
74 vector<data> find_id(const string& bibli, const string& benchmark) { 82 vector<data> find_id(const string& bibli, const string& benchmark) {
75 ifstream lib(bibli); 83 ifstream lib(bibli);
76 json js = json::parse(lib); 84 json js = json::parse(lib);
...@@ -112,6 +120,7 @@ vector<data> find_id(const string& bibli, const string& benchmark) { ...@@ -112,6 +120,7 @@ vector<data> find_id(const string& bibli, const string& benchmark) {
112 return association; 120 return association;
113 } 121 }
114 122
123 +//check if the motif is found matching with a complete sequence from a benchmark file.
115 bool does_it_match(const string& seq, const string& seq_motif) { 124 bool does_it_match(const string& seq, const string& seq_motif) {
116 size_t found = seq_motif.find("&"); 125 size_t found = seq_motif.find("&");
117 size_t size = seq_motif.size(); 126 size_t size = seq_motif.size();
...@@ -150,6 +159,7 @@ bool does_it_match(const string& seq, const string& seq_motif) { ...@@ -150,6 +159,7 @@ bool does_it_match(const string& seq, const string& seq_motif) {
150 return false; 159 return false;
151 } 160 }
152 161
162 +//return the list of motif id that didn't match with any other complete sequence than the one which it came from.
153 vector<string> select_not_motif(const string& bibli, const string& benchmark) { 163 vector<string> select_not_motif(const string& bibli, const string& benchmark) {
154 vector<string> selection; 164 vector<string> selection;
155 vector<data> association = find_id(bibli, benchmark); 165 vector<data> association = find_id(bibli, benchmark);
...@@ -187,8 +197,8 @@ vector<string> select_not_motif(const string& bibli, const string& benchmark) { ...@@ -187,8 +197,8 @@ vector<string> select_not_motif(const string& bibli, const string& benchmark) {
187 197
188 int main() 198 int main()
189 { 199 {
190 - string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; 200 + string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json";
191 - string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; 201 + string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/benchmark.dbn";
192 202
193 /*vector<data> v = get_list_pdb_benchmark(benchmark); 203 /*vector<data> v = get_list_pdb_benchmark(benchmark);
194 for (data d : v) { 204 for (data d : v) {
......