Showing
62 changed files
with
161 additions
and
1757 deletions
This diff is collapsed. Click to expand it.
... | @@ -65,9 +65,9 @@ Check the file [INSTALL.md](INSTALL.md) for installation instructions. | ... | @@ -65,9 +65,9 @@ Check the file [INSTALL.md](INSTALL.md) for installation instructions. |
65 | ``` | 65 | ``` |
66 | Usage: You must provide: | 66 | Usage: You must provide: |
67 | 1) a FASTA input file with -i, | 67 | 1) a FASTA input file with -i, |
68 | - 2) a module type with --rna3dmotifs, --carnaval or --3dmotifatlas | 68 | + 2) a module type with --rna3dmotifs, --carnaval, --3dmotifatlas or --contacts |
69 | 3) one module placement method in { --patternmatch, --jar3d, --bayespairing } | 69 | 3) one module placement method in { --patternmatch, --jar3d, --bayespairing } |
70 | - 4) one scoring function with --func A, B, C or D | 70 | + 4) one scoring function with --func A, B, C, D, E ou F |
71 | 71 | ||
72 | If you are not using the Docker image: | 72 | If you are not using the Docker image: |
73 | 5) --modules-path, --biorseo-dir and (--jar3d-exec or --bypdir) | 73 | 5) --modules-path, --biorseo-dir and (--jar3d-exec or --bypdir) |
... | @@ -79,6 +79,7 @@ Options: | ... | @@ -79,6 +79,7 @@ Options: |
79 | --rna3dmotifs Use DESC modules from Djelloul & Denise, 2008 | 79 | --rna3dmotifs Use DESC modules from Djelloul & Denise, 2008 |
80 | --carnaval Use RIN modules from Reinharz & al, 2018 | 80 | --carnaval Use RIN modules from Reinharz & al, 2018 |
81 | --3dmotifatlas Use the HL and IL loops from BGSU's 3D Motif Atlas (updated) | 81 | --3dmotifatlas Use the HL and IL loops from BGSU's 3D Motif Atlas (updated) |
82 | +--contacts Use the library of motifs, created from RNA sequences linked to proteins provided by I. Chauvot de Beauchene of LORIA laboratory | ||
82 | -p [ --patternmatch ] Use regular expressions to place modules in the sequence (requires --rna3dmotifs or --carnaval) | 83 | -p [ --patternmatch ] Use regular expressions to place modules in the sequence (requires --rna3dmotifs or --carnaval) |
83 | -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas) | 84 | -j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas) |
84 | -b [ --bayespairing ] Use BayesPairing2 to place modules in the sequence (requires --rna3dmotifs or --3dmotifatlas) | 85 | -b [ --bayespairing ] Use BayesPairing2 to place modules in the sequence (requires --rna3dmotifs or --3dmotifatlas) |
... | @@ -123,5 +124,6 @@ The allowed module/placement-method/function combinations are: | ... | @@ -123,5 +124,6 @@ The allowed module/placement-method/function combinations are: |
123 | --rna3dmotifs A. B. A. B. C. D. | 124 | --rna3dmotifs A. B. A. B. C. D. |
124 | --3dmotifatlas A. B. C. D. A. B. C. D. | 125 | --3dmotifatlas A. B. C. D. A. B. C. D. |
125 | --carnaval A. B. | 126 | --carnaval A. B. |
127 | +--contacts E. F. | ||
126 | 128 | ||
127 | ``` | 129 | ``` | ... | ... |
... | @@ -381,7 +381,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool | ... | @@ -381,7 +381,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool |
381 | break; | 381 | break; |
382 | 382 | ||
383 | case 'E': | 383 | case 'E': |
384 | - // Fonction f1E | 384 | + // Fonction f1E |
385 | for (const Component& c : insertion_sites_[i].comp) sum_k += c.k; | 385 | for (const Component& c : insertion_sites_[i].comp) sum_k += c.k; |
386 | obj1 += IloNum(sum_k * insertion_sites_[i].contact_ * insertion_sites_[i].tx_occurrences_) * insertion_dv_[index_of_first_components[i]] ; | 386 | obj1 += IloNum(sum_k * insertion_sites_[i].contact_ * insertion_sites_[i].tx_occurrences_) * insertion_dv_[index_of_first_components[i]] ; |
387 | break; | 387 | break; |
... | @@ -395,6 +395,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool | ... | @@ -395,6 +395,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool |
395 | 395 | ||
396 | } | 396 | } |
397 | } | 397 | } |
398 | + //Stacking energy parameter matrix | ||
398 | double energy[7][7] = { | 399 | double energy[7][7] = { |
399 | {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, | 400 | {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, |
400 | {0.0, 1.1, 2.1, 2.2, 1.4, 0.9, 0.6}, | 401 | {0.0, 1.1, 2.1, 2.2, 1.4, 0.9, 0.6}, |
... | @@ -408,7 +409,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool | ... | @@ -408,7 +409,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool |
408 | obj2 = IloExpr(env_); | 409 | obj2 = IloExpr(env_); |
409 | switch (obj_function2_nbr_) { | 410 | switch (obj_function2_nbr_) { |
410 | case 'a': | 411 | case 'a': |
411 | - // Define the MFE: | 412 | + // Define the MFE (Minimum Free Energy): |
412 | for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) { | 413 | for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) { |
413 | for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) { | 414 | for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) { |
414 | if (get_xij_index(u, v) != rna_.get_RNA_length() * rna_.get_RNA_length() + 1) { | 415 | if (get_xij_index(u, v) != rna_.get_RNA_length() * rna_.get_RNA_length() + 1) { |
... | @@ -429,7 +430,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool | ... | @@ -429,7 +430,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool |
429 | } | 430 | } |
430 | break; | 431 | break; |
431 | } | 432 | } |
432 | - //std::cout << "\n fin \n"; | ||
433 | } | 433 | } |
434 | 434 | ||
435 | MOIP::~MOIP() { env_.end(); } | 435 | MOIP::~MOIP() { env_.end(); } |
... | @@ -705,7 +705,6 @@ void MOIP::define_problem_constraints(string& source) | ... | @@ -705,7 +705,6 @@ void MOIP::define_problem_constraints(string& source) |
705 | 705 | ||
706 | SecondaryStructure MOIP::solve_objective(int o, double min, double max) | 706 | SecondaryStructure MOIP::solve_objective(int o, double min, double max) |
707 | { | 707 | { |
708 | - //cout << endl << "BEGIN" << endl; | ||
709 | // Solves one of the objectives, under constraint that the other should be in [min, max] | 708 | // Solves one of the objectives, under constraint that the other should be in [min, max] |
710 | 709 | ||
711 | if (min > max) { | 710 | if (min > max) { |
... | @@ -755,17 +754,11 @@ SecondaryStructure MOIP::solve_objective(int o, double min, double max) | ... | @@ -755,17 +754,11 @@ SecondaryStructure MOIP::solve_objective(int o, double min, double max) |
755 | } | 754 | } |
756 | 755 | ||
757 | // if (verbose_) cout << "\t\t>retrieving basepairs of the result secondary structure..." << endl; | 756 | // if (verbose_) cout << "\t\t>retrieving basepairs of the result secondary structure..." << endl; |
758 | - //cout << "y(2,80): " << cplex_.getValue(y(u, v)) << endl; | ||
759 | for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) | 757 | for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) |
760 | for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) | 758 | for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) |
761 | if (allowed_basepair(u, v)) | 759 | if (allowed_basepair(u, v)) |
762 | if (cplex_.getValue(y(u, v)) > 0.5) { | 760 | if (cplex_.getValue(y(u, v)) > 0.5) { |
763 | best_ss.set_basepair(u, v); | 761 | best_ss.set_basepair(u, v); |
764 | - /*if (u == 5 && v == 26) { | ||
765 | - cout << endl << "(" << u << "," << v << "): " << endl; | ||
766 | - cout << best_ss.to_string() << endl; | ||
767 | - cout << "(((...((((((((....))))))))(((.....((((((((....)))))))))))...((((((((....)))))))))))" << endl; | ||
768 | - }*/ | ||
769 | } | 762 | } |
770 | 763 | ||
771 | best_ss.sort(); // order the basepairs in the vector | 764 | best_ss.sort(); // order the basepairs in the vector |
... | @@ -1159,7 +1152,6 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct) | ... | @@ -1159,7 +1152,6 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct) |
1159 | } | 1152 | } |
1160 | } | 1153 | } |
1161 | 1154 | ||
1162 | -//Temporaire-------------------------------------- | ||
1163 | 1155 | ||
1164 | //Check if the sequence is a rna sequence (ATGC) and replace T by U or remove modified nucleotide if necessary | 1156 | //Check if the sequence is a rna sequence (ATGC) and replace T by U or remove modified nucleotide if necessary |
1165 | string check_motif_sequence(string seq) { | 1157 | string check_motif_sequence(string seq) { |
... | @@ -1184,9 +1176,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { | ... | @@ -1184,9 +1176,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { |
1184 | stack<uint> accolades; | 1176 | stack<uint> accolades; |
1185 | stack<uint> chevrons; | 1177 | stack<uint> chevrons; |
1186 | 1178 | ||
1187 | - /*for(uint j = 0; j < v.size(); j++) { | ||
1188 | - cout << "composante: (" << v[j].pos.first << "," << v[j].pos.second << ")" << endl << endl; | ||
1189 | - }*/ | ||
1190 | uint count = 0; | 1179 | uint count = 0; |
1191 | uint debut = v[count].pos.first; | 1180 | uint debut = v[count].pos.first; |
1192 | uint gap = 0; | 1181 | uint gap = 0; |
... | @@ -1194,12 +1183,10 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { | ... | @@ -1194,12 +1183,10 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { |
1194 | for (uint i = 0; i < struc.size(); i++) { | 1183 | for (uint i = 0; i < struc.size(); i++) { |
1195 | if (struc[i] == '(') { | 1184 | if (struc[i] == '(') { |
1196 | parentheses.push(i + debut + gap - count); | 1185 | parentheses.push(i + debut + gap - count); |
1197 | - //cout << "i: " << i << " pos :" << parentheses.top() << endl; | ||
1198 | 1186 | ||
1199 | } else if (struc[i] == ')') { | 1187 | } else if (struc[i] == ')') { |
1200 | Link l; | 1188 | Link l; |
1201 | l.nts.first = parentheses.top(); | 1189 | l.nts.first = parentheses.top(); |
1202 | - //cout << "top :" << parentheses.top() << endl; | ||
1203 | l.nts.second = i + debut + gap - count; | 1190 | l.nts.second = i + debut + gap - count; |
1204 | vec.push_back(l); | 1191 | vec.push_back(l); |
1205 | parentheses.pop(); | 1192 | parentheses.pop(); |
... | @@ -1237,8 +1224,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { | ... | @@ -1237,8 +1224,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) { |
1237 | } else if (struc[i] == '&') { | 1224 | } else if (struc[i] == '&') { |
1238 | count ++; | 1225 | count ++; |
1239 | gap += v[count].pos.first - v[count - 1].pos.second - 1; | 1226 | gap += v[count].pos.first - v[count - 1].pos.second - 1; |
1240 | - //cout << "count: " << count << endl; | ||
1241 | - //cout << "gap : " << gap << endl; | ||
1242 | } | 1227 | } |
1243 | } | 1228 | } |
1244 | return vec; | 1229 | return vec; |
... | @@ -1311,11 +1296,9 @@ vector<string> find_components(string sequence, string delimiter) { | ... | @@ -1311,11 +1296,9 @@ vector<string> find_components(string sequence, string delimiter) { |
1311 | subseq = seq.substr(0, fin); | 1296 | subseq = seq.substr(0, fin); |
1312 | seq = seq.substr(fin + 1); | 1297 | seq = seq.substr(fin + 1); |
1313 | list.push_back(subseq); // new component sequence | 1298 | list.push_back(subseq); // new component sequence |
1314 | - //std::cout << "subseq: " << subseq << endl; | ||
1315 | } | 1299 | } |
1316 | if (!seq.empty()) { | 1300 | if (!seq.empty()) { |
1317 | list.push_back(seq); | 1301 | list.push_back(seq); |
1318 | - //std::cout << "subseq: " << seq << endl; | ||
1319 | } | 1302 | } |
1320 | return list; | 1303 | return list; |
1321 | } | 1304 | } |
... | @@ -1324,15 +1307,11 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { | ... | @@ -1324,15 +1307,11 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { |
1324 | vector<uint> positions; | 1307 | vector<uint> positions; |
1325 | string delimiter = "*"; | 1308 | string delimiter = "*"; |
1326 | uint debut; | 1309 | uint debut; |
1327 | - /*cout << "vsize: " << v.size() << endl; | ||
1328 | - cout << "struc2dsize: " << struc2d.size() << endl;*/ | ||
1329 | for (uint i = 0; i < v.size(); i++) { | 1310 | for (uint i = 0; i < v.size(); i++) { |
1330 | - //cout << "[" << i << "]:" << endl; | ||
1331 | debut = v[i].pos.first; | 1311 | debut = v[i].pos.first; |
1332 | uint pos = struc2d[i].find(delimiter, 0); | 1312 | uint pos = struc2d[i].find(delimiter, 0); |
1333 | while(pos != string::npos && pos <= struc2d[i].size()) | 1313 | while(pos != string::npos && pos <= struc2d[i].size()) |
1334 | { | 1314 | { |
1335 | - //cout << "position: " << pos + debut << endl; | ||
1336 | positions.push_back(pos + debut); | 1315 | positions.push_back(pos + debut); |
1337 | pos = struc2d[i].find(delimiter, pos+1); | 1316 | pos = struc2d[i].find(delimiter, pos+1); |
1338 | } | 1317 | } |
... | @@ -1340,8 +1319,6 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { | ... | @@ -1340,8 +1319,6 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) { |
1340 | return positions; | 1319 | return positions; |
1341 | } | 1320 | } |
1342 | 1321 | ||
1343 | -//Temporaire-------------------------------------- | ||
1344 | - | ||
1345 | void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id) | 1322 | void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id) |
1346 | { | 1323 | { |
1347 | /* | 1324 | /* |
... | @@ -1373,8 +1350,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai | ... | @@ -1373,8 +1350,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai |
1373 | string keys[5] = {"contacts", "occurences", "pdb", "sequence", "struct2d"}; | 1350 | string keys[5] = {"contacts", "occurences", "pdb", "sequence", "struct2d"}; |
1374 | uint it_errors = 0; | 1351 | uint it_errors = 0; |
1375 | uint comp; | 1352 | uint comp; |
1376 | - //uint max_occ = 0; | ||
1377 | - //uint max_n = 0; | ||
1378 | uint occ = 0; | 1353 | uint occ = 0; |
1379 | 1354 | ||
1380 | for(auto it = js.begin(); it != js.end(); ++it) { | 1355 | for(auto it = js.begin(); it != js.end(); ++it) { |
... | @@ -1385,10 +1360,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai | ... | @@ -1385,10 +1360,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai |
1385 | // Check for known errors to ignore corresponding motifs | 1360 | // Check for known errors to ignore corresponding motifs |
1386 | if (comp == errors_id[it_errors].first) { | 1361 | if (comp == errors_id[it_errors].first) { |
1387 | while (comp == errors_id[it_errors].first) { | 1362 | while (comp == errors_id[it_errors].first) { |
1388 | - //cout << "id erreur: " << errors_id[it_errors].first << " " << errors_id[it_errors].second << endl; | ||
1389 | - /*if (contacts_id.compare("974") == 0) { | ||
1390 | - cout << "id erreur: " << errors_id[it_errors].second << endl; | ||
1391 | - }*/ | ||
1392 | it_errors ++; | 1363 | it_errors ++; |
1393 | } | 1364 | } |
1394 | continue; | 1365 | continue; |
... | @@ -1396,7 +1367,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai | ... | @@ -1396,7 +1367,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai |
1396 | 1367 | ||
1397 | for(auto it2 = js[contacts_id].begin(); it2 != js[contacts_id].end(); ++it2) { | 1368 | for(auto it2 = js[contacts_id].begin(); it2 != js[contacts_id].end(); ++it2) { |
1398 | field = it2.key(); | 1369 | field = it2.key(); |
1399 | - //cout << "field: " << field << endl; | ||
1400 | if (!field.compare(keys[0])) // This is the contacts field | 1370 | if (!field.compare(keys[0])) // This is the contacts field |
1401 | { | 1371 | { |
1402 | contacts = it2.value(); | 1372 | contacts = it2.value(); |
... | @@ -1406,25 +1376,17 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai | ... | @@ -1406,25 +1376,17 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai |
1406 | else if (!field.compare(keys[1])) // This is the occurences field | 1376 | else if (!field.compare(keys[1])) // This is the occurences field |
1407 | { | 1377 | { |
1408 | occ = it2.value(); | 1378 | occ = it2.value(); |
1409 | - //max_occ = find_max_occurrences(filepath); | ||
1410 | tx_occurrences = (double)occ; // / (double)max_occ; | 1379 | tx_occurrences = (double)occ; // / (double)max_occ; |
1411 | - //cout << "occ: " << tx_occurrences << endl; | ||
1412 | 1380 | ||
1413 | } | 1381 | } |
1414 | else if (!field.compare(keys[2])) // This is the pdb field | 1382 | else if (!field.compare(keys[2])) // This is the pdb field |
1415 | { | 1383 | { |
1416 | vector<string> tab = it2.value(); | 1384 | vector<string> tab = it2.value(); |
1417 | pdbs = tab; | 1385 | pdbs = tab; |
1418 | - /*for (uint i = 0; i < pdbs.size(); i++) { | ||
1419 | - cout << "pdbs[" << i << "]: " << pdbs[i] << endl; | ||
1420 | - }*/ | ||
1421 | - | ||
1422 | } | 1386 | } |
1423 | else if (!field.compare(keys[3])) // This is the sequence field | 1387 | else if (!field.compare(keys[3])) // This is the sequence field |
1424 | { | 1388 | { |
1425 | seq = check_motif_sequence(it2.value()); | 1389 | seq = check_motif_sequence(it2.value()); |
1426 | - /*max_n = find_max_sequence(filepath); | ||
1427 | - tx_occurrences = (double)occ / (double)max_n - seq.size() + 1 ;*/ | ||
1428 | component_sequences = find_components(seq, "&"); | 1390 | component_sequences = find_components(seq, "&"); |
1429 | } | 1391 | } |
1430 | else if (!field.compare(keys[4])) // This is the struct2D field | 1392 | else if (!field.compare(keys[4])) // This is the struct2D field |
... | @@ -1440,9 +1402,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai | ... | @@ -1440,9 +1402,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai |
1440 | 1402 | ||
1441 | Motif temp_motif = Motif(v, contacts_id, nb_contacts, tx_occurrences); | 1403 | Motif temp_motif = Motif(v, contacts_id, nb_contacts, tx_occurrences); |
1442 | temp_motif.links_ = search_pairing(struct2d, v); | 1404 | temp_motif.links_ = search_pairing(struct2d, v); |
1443 | - //cout << "test" << endl; | ||
1444 | temp_motif.pos_contacts = find_contacts(component_contacts, v); | 1405 | temp_motif.pos_contacts = find_contacts(component_contacts, v); |
1445 | - //cout << "test2" << endl; | ||
1446 | 1406 | ||
1447 | // Check if the motif can be inserted, checking the basepairs probabilities and theta | 1407 | // Check if the motif can be inserted, checking the basepairs probabilities and theta |
1448 | bool unprobable = false; | 1408 | bool unprobable = false; | ... | ... |
... | @@ -275,8 +275,7 @@ char Motif::is_valid_RIN(const string& rinfile) | ... | @@ -275,8 +275,7 @@ char Motif::is_valid_RIN(const string& rinfile) |
275 | return (char) 0; | 275 | return (char) 0; |
276 | } | 276 | } |
277 | 277 | ||
278 | -//temporaire--------------------------------------------------- | 278 | +//check that there are as many opening parentheses as closing ones |
279 | - | ||
280 | bool checkSecondaryStructure(string struc) | 279 | bool checkSecondaryStructure(string struc) |
281 | { | 280 | { |
282 | stack<uint> parentheses; | 281 | stack<uint> parentheses; |
... | @@ -332,6 +331,7 @@ bool checkSecondaryStructure(string struc) | ... | @@ -332,6 +331,7 @@ bool checkSecondaryStructure(string struc) |
332 | return (parentheses.empty() && crochets.empty() && accolades.empty() && chevrons.empty()); | 331 | return (parentheses.empty() && crochets.empty() && accolades.empty() && chevrons.empty()); |
333 | } | 332 | } |
334 | 333 | ||
334 | +//count the number of nucleotide in the motif sequence | ||
335 | size_t count_nucleotide(string& seq) { | 335 | size_t count_nucleotide(string& seq) { |
336 | size_t count = 0; | 336 | size_t count = 0; |
337 | for(uint i = 0; i < seq.size(); i++) { | 337 | for(uint i = 0; i < seq.size(); i++) { |
... | @@ -343,6 +343,7 @@ size_t count_nucleotide(string& seq) { | ... | @@ -343,6 +343,7 @@ size_t count_nucleotide(string& seq) { |
343 | return count; | 343 | return count; |
344 | } | 344 | } |
345 | 345 | ||
346 | +//count the numbre of '&' in the motif sequence | ||
346 | size_t count_delimiter(string& seq) { | 347 | size_t count_delimiter(string& seq) { |
347 | size_t count = 0; | 348 | size_t count = 0; |
348 | for(uint i = 0; i < seq.size(); i++) { | 349 | for(uint i = 0; i < seq.size(); i++) { |
... | @@ -354,7 +355,6 @@ size_t count_delimiter(string& seq) { | ... | @@ -354,7 +355,6 @@ size_t count_delimiter(string& seq) { |
354 | return count; | 355 | return count; |
355 | } | 356 | } |
356 | 357 | ||
357 | -//-------------------------------------------------------------- | ||
358 | vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) | 358 | vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) |
359 | { | 359 | { |
360 | // /!\ returns 0 if no errors | 360 | // /!\ returns 0 if no errors |
... | @@ -458,7 +458,6 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) | ... | @@ -458,7 +458,6 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile) |
458 | } | 458 | } |
459 | j++; | 459 | j++; |
460 | } | 460 | } |
461 | - //std::cout << "no error!\n" << endl; | ||
462 | } | 461 | } |
463 | return errors_id; | 462 | return errors_id; |
464 | } | 463 | } |
... | @@ -524,17 +523,9 @@ vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<stri | ... | @@ -524,17 +523,9 @@ vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<stri |
524 | if (regex_search(rna, c)) { | 523 | if (regex_search(rna, c)) { |
525 | if (vc.size() > 2) { | 524 | if (vc.size() > 2) { |
526 | next_seqs = vector<string>(&vc[1], &vc[vc.size()]); | 525 | next_seqs = vector<string>(&vc[1], &vc[vc.size()]); |
527 | - /*for (uint i = 0; i < next_seqs.size(); i++) { | ||
528 | - std::cout << "next seq: " << next_seqs[i] << endl; | ||
529 | - } | ||
530 | - std::cout << endl;*/ | ||
531 | } | 526 | } |
532 | else { | 527 | else { |
533 | next_seqs = vector<string>(1, vc.back()); | 528 | next_seqs = vector<string>(1, vc.back()); |
534 | - /*for (uint i = 0; i < next_seqs.size(); i++) { | ||
535 | - std::cout << "next seq: " << next_seqs[i] << endl; | ||
536 | - } | ||
537 | - std::cout << endl;*/ | ||
538 | } | 529 | } |
539 | uint j = 0; | 530 | uint j = 0; |
540 | // For every regexp match | 531 | // For every regexp match |
... | @@ -606,17 +597,9 @@ vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector | ... | @@ -606,17 +597,9 @@ vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector |
606 | if (regex_search(rna, c)) { | 597 | if (regex_search(rna, c)) { |
607 | if (vc.size() > 2) { | 598 | if (vc.size() > 2) { |
608 | next_seqs = vector<string>(&vc[1], &vc[vc.size()]); | 599 | next_seqs = vector<string>(&vc[1], &vc[vc.size()]); |
609 | - /*for (uint i = 0; i < next_seqs.size(); i++) { | ||
610 | - std::cout << "next seq: " << next_seqs[i] << endl; | ||
611 | - } | ||
612 | - std::cout << endl;*/ | ||
613 | } | 600 | } |
614 | else { | 601 | else { |
615 | next_seqs = vector<string>(1, vc.back()); | 602 | next_seqs = vector<string>(1, vc.back()); |
616 | - /*for (uint i = 0; i < next_seqs.size(); i++) { | ||
617 | - std::cout << "next seq: " << next_seqs[i] << endl; | ||
618 | - } | ||
619 | - std::cout << endl;*/ | ||
620 | } | 603 | } |
621 | uint j = 0; | 604 | uint j = 0; |
622 | // For every regexp match | 605 | // For every regexp match | ... | ... |
cppsrc/Scripts/Add_pdb.cpp
deleted
100644 → 0
1 | -#include <iostream> | ||
2 | -#include <sstream> | ||
3 | -#include <fstream> | ||
4 | -#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp" | ||
5 | -#include <typeinfo> | ||
6 | -#include <set> | ||
7 | -#include <algorithm> | ||
8 | -#include <cstdio> | ||
9 | -#include <vector> | ||
10 | - | ||
11 | -using namespace std; | ||
12 | -using json = nlohmann::json; | ||
13 | - | ||
14 | -//Concatenate the motives from jsonmotifs by adding the corresponding pdb from jsondssr | ||
15 | -void add_pdb(const string& jsonmotifs, const string& jsondssr, const string& jsonoutfile) { | ||
16 | - std::ifstream lib(jsonmotifs); | ||
17 | - std::ifstream lib2(jsondssr); | ||
18 | - | ||
19 | - std::ofstream outfile (jsonoutfile); | ||
20 | - json new_motif; | ||
21 | - json new_id; | ||
22 | - json js = json::parse(lib); | ||
23 | - json js2 = json::parse(lib2); | ||
24 | - | ||
25 | - for (auto it = js.begin(); it != js.end(); ++it) { | ||
26 | - string id = it.key(); | ||
27 | - | ||
28 | - string sequence, structure; | ||
29 | - vector<string> list_pdbs; | ||
30 | - vector<string> list_pdbs2; | ||
31 | - bool is_added = true; | ||
32 | - | ||
33 | - //cout << "id: " << id << endl; | ||
34 | - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | ||
35 | - string test = it2.key(); | ||
36 | - | ||
37 | - if (!test.compare("sequence")) { | ||
38 | - sequence = it2.value(); | ||
39 | - new_id[test] = it2.value(); | ||
40 | - | ||
41 | - } else if (!test.compare("struct2d")) { | ||
42 | - structure = it2.value(); | ||
43 | - new_id[test] = it2.value(); | ||
44 | - | ||
45 | - } else { | ||
46 | - new_id[test] = it2.value(); | ||
47 | - } | ||
48 | - } | ||
49 | - //cout << "-------begin---------" << endl; | ||
50 | - | ||
51 | - for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) { | ||
52 | - string id2 = it3.key(); | ||
53 | - string sequence2, structure2; | ||
54 | - | ||
55 | - //cout << "id: " << id << " / id2: " << id2 << endl; | ||
56 | - for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) { | ||
57 | - string chain = it4.key(); | ||
58 | - | ||
59 | - for (auto it5 = js2[id2][chain].begin(); it5 != js2[id2][chain].end(); ++it5) { | ||
60 | - string test = it5.key(); | ||
61 | - | ||
62 | - if (!test.compare("sequence")) { | ||
63 | - sequence2 = it5.value(); | ||
64 | - //cout << sequence2 << endl; | ||
65 | - if (!sequence.compare(sequence2) && !structure.compare(structure2)) { | ||
66 | - //cout << id2 << endl; | ||
67 | - vector<string> tmp; | ||
68 | - tmp.push_back(id2); | ||
69 | - new_id["pdb"] = tmp; | ||
70 | - } | ||
71 | - | ||
72 | - } else if (!test.compare("2D ")) { | ||
73 | - structure2 = it5.value(); | ||
74 | - //cout << structure2 << endl; | ||
75 | - } | ||
76 | - } | ||
77 | - } | ||
78 | - //cout << endl;*/ | ||
79 | - } | ||
80 | - | ||
81 | - | ||
82 | - /*for(uint ii = 0; ii < list_pfams.size(); ii++) { | ||
83 | - for (uint jj = 0; jj < list_pfams[ii].size(); jj++) { | ||
84 | - cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl; | ||
85 | - } | ||
86 | - }*/ | ||
87 | - new_motif[id] = new_id; | ||
88 | - new_id.clear(); | ||
89 | - //cout << "valeur: " << ite << endl; | ||
90 | - /*for (uint i = 0; i < tab_struc.size() ; i++) { | ||
91 | - cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl; | ||
92 | - } */ | ||
93 | - } | ||
94 | - outfile << new_motif.dump(4) << endl; | ||
95 | - outfile.close(); | ||
96 | -} | ||
97 | - | ||
98 | -int main() | ||
99 | -{ | ||
100 | - string jsonmotifs = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_beta.json"; | ||
101 | - string jsondssr = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/dssr2.json"; | ||
102 | - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_fusion_beta.json"; | ||
103 | - add_pdb(jsonmotifs, jsondssr, out); | ||
104 | - return 0; | ||
105 | -} | ||
106 | - |
cppsrc/Scripts/addDelimiter
deleted
100644 → 0
No preview for this file type
cppsrc/Scripts/addPdb
deleted
100644 → 0
No preview for this file type
cppsrc/Scripts/countPattern
deleted
100644 → 0
No preview for this file type
cppsrc/Scripts/count_pattern.cpp
deleted
100644 → 0
This diff is collapsed. Click to expand it.
cppsrc/Scripts/create
deleted
100644 → 0
No preview for this file type
cppsrc/Scripts/create_benchmark.cpp
deleted
100644 → 0
1 | -#include <iostream> | ||
2 | -#include <sstream> | ||
3 | -#include <fstream> | ||
4 | -#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp" | ||
5 | -#include <typeinfo> | ||
6 | -#include <set> | ||
7 | -#include <algorithm> | ||
8 | -#include <cstdio> | ||
9 | -#include <vector> | ||
10 | - | ||
11 | -using namespace std; | ||
12 | -using json = nlohmann::json; | ||
13 | -/* | ||
14 | -vector<string> find_components(string sequence, string delimiter) { | ||
15 | - vector<string> list; | ||
16 | - string seq = sequence; | ||
17 | - string subseq; | ||
18 | - uint fin = 0; | ||
19 | - | ||
20 | - while(seq.find(delimiter) != string::npos) { | ||
21 | - fin = seq.find(delimiter); | ||
22 | - | ||
23 | - subseq = seq.substr(0, fin); | ||
24 | - seq = seq.substr(fin + 1); | ||
25 | - list.push_back(subseq); // new component sequence | ||
26 | - //std::cout << "subseq: " << subseq << endl; | ||
27 | - } | ||
28 | - if (!seq.empty()) { | ||
29 | - list.push_back(seq); | ||
30 | - //std::cout << "subseq: " << seq << endl; | ||
31 | - } | ||
32 | - return list; | ||
33 | -} | ||
34 | - | ||
35 | -string is_include(vector<string>& components, string sequence, vector<string>& contacts) { | ||
36 | - | ||
37 | - string seq_contact = ""; | ||
38 | - vector<uint> positions; | ||
39 | - uint count = 0; | ||
40 | - uint debut = 0; | ||
41 | - string str = components[0]; | ||
42 | - | ||
43 | - uint pos = sequence.find(str, 0); | ||
44 | - debut = pos + components[0].size(); | ||
45 | - | ||
46 | - if (pos == 0) { | ||
47 | - seq_contact += contacts[0]; | ||
48 | - } else if (pos <= sequence.size()) { | ||
49 | - string gap = ""; | ||
50 | - for (uint i = 0; i < pos; i++) { | ||
51 | - gap += "."; | ||
52 | - } | ||
53 | - seq_contact += gap + contacts[0]; | ||
54 | - } | ||
55 | - while(pos <= sequence.size() && count < components.size() - 1) | ||
56 | - { | ||
57 | - string gap = ""; | ||
58 | - debut = pos + components[count].size(); | ||
59 | - count++; | ||
60 | - str = components[count]; | ||
61 | - pos = sequence.find(str, pos + components[count-1].size()); | ||
62 | - | ||
63 | - for (uint i = debut; i < pos; i++) { | ||
64 | - gap += "."; | ||
65 | - } | ||
66 | - seq_contact += gap + contacts[count]; | ||
67 | - | ||
68 | - } | ||
69 | - if (count == components.size() - 1) { | ||
70 | - string gap = ""; | ||
71 | - if (seq_contact.size() != sequence.size()) { | ||
72 | - for (uint i = 0; i < sequence.size() - seq_contact.size(); i++) { | ||
73 | - gap += "."; | ||
74 | - } | ||
75 | - } | ||
76 | - seq_contact += gap; | ||
77 | - return seq_contact; | ||
78 | - } | ||
79 | - return std::string(); | ||
80 | -}*/ | ||
81 | -/* | ||
82 | -//Concatenate the contact field to the motives of the benchmark (which is obtained from the motives library) | ||
83 | -string add_contact(const string& jsonbm, const string& jsonmotifs) { | ||
84 | - std::ifstream lib(jsonbm); | ||
85 | - std::ifstream lib2(jsonmotifs); | ||
86 | - string bm2 = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.json"; | ||
87 | - std::ofstream outfile (bm2); | ||
88 | - json new_motif; | ||
89 | - json new_id; | ||
90 | - json js = json::parse(lib); | ||
91 | - json js2 = json::parse(lib2); | ||
92 | - | ||
93 | - for (auto it = js.begin(); it != js.end(); ++it) { | ||
94 | - string id = it.key(); | ||
95 | - string seq_bm; | ||
96 | - string seq_contact; | ||
97 | - | ||
98 | - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | ||
99 | - string test = it2.key(); | ||
100 | - //cout << "test: " << it2.key() << endl; | ||
101 | - if (!test.compare("seq")) { | ||
102 | - seq_bm = it2.value(); | ||
103 | - new_id[test] = it2.value(); | ||
104 | - } else { | ||
105 | - new_id[test] = it2.value(); | ||
106 | - } | ||
107 | - } | ||
108 | - //cout << "-------begin---------" << endl; | ||
109 | - | ||
110 | - for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) { | ||
111 | - string id2 = it3.key(); | ||
112 | - vector<string> comp; | ||
113 | - vector<string> strucs; | ||
114 | - vector<string> list_pdbs; | ||
115 | - bool flag = false; | ||
116 | - | ||
117 | - //cout << "id: " << id << " / id2: " << id2 << endl; | ||
118 | - for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) { | ||
119 | - string test = it4.key(); | ||
120 | - | ||
121 | - if (!test.compare("sequence")) { | ||
122 | - string sequence = it4.value(); | ||
123 | - comp = find_components(sequence, "&"); | ||
124 | - //cout << id << " / " << id2 << endl; | ||
125 | - } else if (!test.compare("contacts")) { | ||
126 | - string struc2d = it4.value(); | ||
127 | - strucs = find_components(struc2d, "&"); | ||
128 | - | ||
129 | - } else if (!test.compare("pdb")) { | ||
130 | - vector<string> tab = it4.value(); | ||
131 | - list_pdbs = tab; | ||
132 | - if (find(list_pdbs.begin(), list_pdbs.end(), id) != list_pdbs.end()) { | ||
133 | - flag = true; | ||
134 | - } | ||
135 | - } | ||
136 | - } | ||
137 | - if (flag) { | ||
138 | - seq_contact = is_include(comp, seq_bm, strucs); | ||
139 | - //cout << "id: " << id << " id2: " << id2 << " seq_contact: " << seq_contact << endl; | ||
140 | - new_id["ctc"] = seq_contact; | ||
141 | - } | ||
142 | - | ||
143 | - } | ||
144 | - | ||
145 | - new_motif[id] = new_id; | ||
146 | - new_id.clear(); | ||
147 | - | ||
148 | - } | ||
149 | - outfile << new_motif.dump(4) << endl; | ||
150 | - outfile.close(); | ||
151 | - return bm2; | ||
152 | -}*/ | ||
153 | - | ||
154 | -void create_benchmark(const string& jsonmotifs) { | ||
155 | - std::ifstream lib(jsonmotifs); | ||
156 | - string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/"; | ||
157 | - string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"; | ||
158 | - string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | ||
159 | - std::ofstream outlist (list); | ||
160 | - std::ofstream outdbn (dbn); | ||
161 | - json js = json::parse(lib); | ||
162 | - uint count = 0; | ||
163 | - | ||
164 | - for (auto it = js.begin(); it != js.end(); ++it) { | ||
165 | - string id = it.key(); | ||
166 | - string name, seq, contacts, structure; | ||
167 | - for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | ||
168 | - string chain = it2.key(); | ||
169 | - if (chain.compare("pfams") != 0) { | ||
170 | - string name = id + "_" + chain; | ||
171 | - string filename = fasta + name + ".fa"; | ||
172 | - std::ofstream outfasta (filename); | ||
173 | - outfasta << ">test_" << name << endl; | ||
174 | - for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) { | ||
175 | - string field = it3.key(); | ||
176 | - if (!field.compare("sequence")) { | ||
177 | - seq = it3.value(); | ||
178 | - outfasta << seq.substr(0,seq.size()) << endl; | ||
179 | - outfasta.close(); | ||
180 | - | ||
181 | - } else if (!field.compare("contacts")) { | ||
182 | - contacts = it3.value(); | ||
183 | - | ||
184 | - } else if (!field.compare("struct2d")) { | ||
185 | - structure = it3.value(); | ||
186 | - } | ||
187 | - } | ||
188 | - if(seq.find('&') == string::npos) { | ||
189 | - outlist << ">test_" << name << endl; | ||
190 | - outdbn << "test_" << name << "." << endl; | ||
191 | - outlist << contacts << endl; | ||
192 | - outdbn << seq << endl; | ||
193 | - outdbn << structure << endl; | ||
194 | - outdbn << contacts << endl; | ||
195 | - outlist << seq << endl; | ||
196 | - outlist << structure << endl; | ||
197 | - count++; | ||
198 | - } | ||
199 | - } | ||
200 | - } | ||
201 | - } | ||
202 | - cout << count << " sequences en tout" << endl; | ||
203 | - lib.close(); | ||
204 | - outlist.close(); | ||
205 | - outdbn.close(); | ||
206 | -} | ||
207 | - | ||
208 | -int main() | ||
209 | -{ | ||
210 | - string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/"; | ||
211 | - //string jsonmotifs = path + "modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json"; | ||
212 | - string jsonbm = path + "modules/ISAURE/Motifs_version_initiale/benchmark_16-07-2021.json"; | ||
213 | - | ||
214 | - | ||
215 | - //string jsonbm2 = add_contact(jsonbm1, jsonmotifs); | ||
216 | - create_benchmark(jsonbm); | ||
217 | - | ||
218 | - return 0; | ||
219 | -} | ||
220 | - |
cppsrc/Scripts/deletePdb
deleted
100644 → 0
No preview for this file type
cppsrc/Scripts/select
deleted
100644 → 0
No preview for this file type
data/fasta/benchmark.fa
deleted
100755 → 0
1 | ->test_1JJ2 | ||
2 | -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC | ||
3 | ->test_1L9A | ||
4 | -GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAUUUGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC | ||
5 | ->test_1LNG | ||
6 | -UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC | ||
7 | ->test_1MFQ | ||
8 | -GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC | ||
9 | ->test_1SM1 | ||
10 | -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG | ||
11 | ->test_1U6P | ||
12 | -GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU | ||
13 | ->test_1Y69 | ||
14 | -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG | ||
15 | ->test_1YHQ | ||
16 | -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC | ||
17 | ->test_1YI2 | ||
18 | -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC | ||
19 | ->test_2V3C | ||
20 | -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG | ||
21 | ->test_2ZJQ | ||
22 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
23 | ->test_2ZJR | ||
24 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
25 | ->test_3ADB | ||
26 | -GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA | ||
27 | ->test_3CUL | ||
28 | -GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA | ||
29 | ->test_3CUN | ||
30 | -GAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA | ||
31 | ->test_3DLL | ||
32 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
33 | ->test_3HHN | ||
34 | -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA | ||
35 | ->test_3IVKA | ||
36 | -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA | ||
37 | ->test_3IWN | ||
38 | -CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG | ||
39 | ->test_3KTW | ||
40 | -AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU | ||
41 | ->test_3MUM | ||
42 | -GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG | ||
43 | ->test_3MUR | ||
44 | -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG | ||
45 | ->test_3NDB | ||
46 | -GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC | ||
47 | ->test_3PIO | ||
48 | -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU | ||
49 | ->test_3PIP | ||
50 | -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU | ||
51 | ->test_3UCU | ||
52 | -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG | ||
53 | ->test_3UD4 | ||
54 | -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG | ||
55 | ->test_3V7E | ||
56 | -GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA | ||
57 | ->test_3W3S | ||
58 | -GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC | ||
59 | ->test_4IO9 | ||
60 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
61 | ->test_4IOA | ||
62 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
63 | ->test_4LCK | ||
64 | -GGGUGCGAUGAGAAGAAGAGUAUUAAGGAUUUACUAUGAUUAGCGACUCUAGGAUAGUGAAAGCUAGAGGAUAGUAACCUUAAGAAGGCACUUCGAGCACCC | ||
65 | ->test_4P3EA | ||
66 | -GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU | ||
67 | ->test_4P3EB | ||
68 | -GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU | ||
69 | ->test_4UYJ | ||
70 | -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC | ||
71 | ->test_4UYK | ||
72 | -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC | ||
73 | ->test_4W90 | ||
74 | -GCGCGCUUAAUCUGAAAUCAGAGCGGGGGACCCAUUGCACUCCGGGUUUUUCCCGUAAGGGGUGAAUCCUUUUUAGGUAGGGCGAAAGCCCGAAUCCGUCAGCUAACCUCGUAAGCGCGC | ||
75 | ->test_4WF9 | ||
76 | -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC | ||
77 | ->test_4XCO | ||
78 | -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG | ||
79 | ->test_4YB1 | ||
80 | -GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG | ||
81 | ->test_5DM7 | ||
82 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
83 | ->test_5JVGA | ||
84 | -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU | ||
85 | ->test_5M73 | ||
86 | -GGUGUCCGCACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGGGAUCGCGCCUA | ||
87 | ->test_5NRGA | ||
88 | -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC |
data/fasta/benchmark2.fa
deleted
100755 → 0
This diff is collapsed. Click to expand it.
data/fasta/motif_test.fa
deleted
100755 → 0
data/fasta/test.fa
deleted
100755 → 0
data/fasta/tests.fa
deleted
100755 → 0
data/fasta/tests_gros_motifs.fa
deleted
100755 → 0
1 | ->test_927 | ||
2 | -CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG | ||
3 | ->test_170 | ||
4 | -GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA | ||
5 | ->test_768 | ||
6 | -CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG | ||
7 | ->test_770 | ||
8 | -CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG | ||
9 | ->test_266 | ||
10 | -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC | ||
11 | ->test_267 | ||
12 | -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC | ||
13 | ->test_766 | ||
14 | -AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG | ||
15 | ->test_851 | ||
16 | -AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG | ||
17 | ->test_948 | ||
18 | -CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG | ||
19 | ->test_972 | ||
20 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG |
data/fasta/tests_gros_motifs2.fa
deleted
100755 → 0
1 | ->test_159 | ||
2 | -UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA | ||
3 | ->test_122 | ||
4 | -GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA | ||
5 | ->test_264 | ||
6 | -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC | ||
7 | ->test_265 | ||
8 | -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC | ||
9 | ->test_109 | ||
10 | -GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA | ||
11 | ->test_968 | ||
12 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
13 | ->test_962 | ||
14 | -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
15 | ->test_62 | ||
16 | -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC | ||
17 | - |
data/fasta/tests_gros_motifs3.fa
deleted
100755 → 0
1 | ->test_1010 | ||
2 | -GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU | ||
3 | ->test_1018 | ||
4 | -GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC | ||
5 | ->test_1028 | ||
6 | -GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC | ||
7 | ->test_1034 | ||
8 | -UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU | ||
9 | ->test_1035 | ||
10 | -GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC | ||
11 | ->test_147 | ||
12 | -AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU | ||
13 | ->test_72 | ||
14 | -GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA | ||
15 | ->test_968 | ||
16 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
17 | ->test_962 | ||
18 | -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
19 | ->test_62 | ||
20 | -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC | ||
21 | - |
1 | ->test_927 | ||
2 | -CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG | ||
3 | -.((((((.....(&(.&.(((((((&.(.....).&.)))).&&)))...)&).((&.....&.(((((....))))).&....))...)))))). | ||
4 | ->test_170 | ||
5 | -GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA | ||
6 | -(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&))))).... | ||
7 | ->test_768 | ||
8 | -CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG | ||
9 | -(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&&)...(&(((((&(..&..)&&)))))&)..) | ||
10 | ->test_770 | ||
11 | -CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG | ||
12 | -(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&&&((((&(..&..)&))))&)..) | ||
13 | ->test_266 | ||
14 | -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC | ||
15 | -................................................................................................... | ||
16 | ->test_267 | ||
17 | -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC | ||
18 | -................................................................................................... | ||
19 | ->test_766 | ||
20 | -AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG | ||
21 | -.............................................((((((....)))))) | ||
22 | ->test_851 | ||
23 | -AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG | ||
24 | -.............................................((((((....)))))) | ||
25 | ->test_948 | ||
26 | -CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG | ||
27 | -...........................(((((.....))))) | ||
28 | ->test_972 | ||
29 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
30 | -......(((((........))))..)................ | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | ->test_159 | ||
2 | -UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA | ||
3 | -..&&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&&((((((.((....))))))))&)...)))))). | ||
4 | ->test_122 | ||
5 | -GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA | ||
6 | -((((((....(((((&&(&.&..((((((...(.....)...))))..))....)&)))&&))...(&((((((.((....))))))))&)...)))))). | ||
7 | ->test_264 | ||
8 | -CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC | ||
9 | -................................................................................................... | ||
10 | ->test_265 | ||
11 | -CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC | ||
12 | -................................................................................................... | ||
13 | ->test_109 | ||
14 | -GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA | ||
15 | -((((((....(((((&&(&..((((((...(.....)...))))..))....)&)))&))...(&&((((((.((....))))))))&)...)))))). | ||
16 | ->test_968 | ||
17 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
18 | -......(((((........))))..)................ | ||
19 | ->test_962 | ||
20 | -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
21 | -.....(((((........))))..)................ | ||
22 | ->test_62 | ||
23 | -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC | ||
24 | -(((((((((.((((((....))))..))))))))))) | ||
25 | - | ||
26 | - | ||
27 | - | ||
28 | - |
1 | ->test_1010 | ||
2 | -GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU | ||
3 | -..........((((((..((((....))))....))))))..(((..).)).......((((....)))).. | ||
4 | ->test_1018 | ||
5 | -GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC | ||
6 | -(((((..((((.....[..)))).(((((.......))))).....(((((..]....)))))))))).... | ||
7 | ->test_1028 | ||
8 | -GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC | ||
9 | -(((((((..((((........)))).(((((.(...).))))).....(((((.......)))))))))))) | ||
10 | ->test_1034 | ||
11 | -UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU | ||
12 | -.......(((((.(((..(((.........)))..))).....(...((......)).).))))) | ||
13 | ->test_1035 | ||
14 | -GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC | ||
15 | -(((.......(((((.(((..(((.........)))..))).....(...((......)).).)))))...))) | ||
16 | ->test_147 | ||
17 | -AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU | ||
18 | -.....((((..(((.(((((((((....)))))....)))))))))))((((((((((....)))))))))) | ||
19 | ->test_72 | ||
20 | -GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA | ||
21 | -(((((((................(((..((((.......))))...)))(((((.......)))))))))))).... | ||
22 | ->test_968 | ||
23 | -GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
24 | -......(((((........))))..)................ | ||
25 | ->test_962 | ||
26 | -AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG | ||
27 | -.....(((((........))))..)................ | ||
28 | ->test_62 | ||
29 | -GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC | ||
30 | -(((((((((.((((((....))))..))))))))))) | ||
31 | - | ||
32 | - |
This diff is collapsed. Click to expand it.
1 | ->test_1JJ2 | ||
2 | -...************.**.....*.*******.****..***.****************.......****.............*****..***...*****............*******.. | ||
3 | -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC | ||
4 | -...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...(((((.....((((((.((....))))))))....)))))...))))))... | ||
5 | ->test_1LNG | ||
6 | -................************.....................................*****....***.................... | ||
7 | -UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC | ||
8 | -..(.((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)).) | ||
9 | ->test_1U6P | ||
10 | -.............................*****..............................................**..................* | ||
11 | -GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU | ||
12 | -.((((..((((((....))))))..)))).....((((..(((.(((((((((....)))))....)))))))))))((((((((((....)))))))))) | ||
13 | ->test_1Y69 | ||
14 | -.........***................................................................**.........****........................... | ||
15 | -CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG | ||
16 | -(((((((((.....((.(((((....((((((...............)))..)))...)))))..))(((.......((.(((((....))))).)).......)))..))))))))) | ||
17 | ->test_1YHQ | ||
18 | -...***************......********.****..***.****************.......****............******..***...****.............*******.. | ||
19 | -UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC | ||
20 | -...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...((.((.....((((((.((....))))))))....)).))...))))))... | ||
21 | ->test_2V3C | ||
22 | -..............************...........******.****.....**....*********...**********........***.... | ||
23 | -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG | ||
24 | -((((..(((((.(((((((((....)))))))))..))))).....(((((.....(((.....(((....))).....)))..)))))..)))). | ||
25 | ->test_2ZJQ | ||
26 | -......****.**..............********..**.******.******.****..*............*******..***.....******.......*****.....******... | ||
27 | -CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU | ||
28 | -.((((((((((.....((.(((((....(((((((...(.....)...))))..)))...)))))..))(((.......((.(((((....))))).)).......)))..)))))))))). | ||
29 | ->test_3ADB | ||
30 | -*............********.****...............****...*.....................**.**.....*..**....*** | ||
31 | -GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA | ||
32 | -(((((((((..((((((..[.))))))((((((.......))))))(((((((....)))))))((((..]....))))))))))))).... | ||
33 | ->test_3CUL | ||
34 | -.............................********.**................................................*... | ||
35 | -GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA | ||
36 | -(((((((....)))))).)(((..(((((..........)))))....)))...(((.(((((((((((.......))))))))))).))). | ||
37 | ->test_3HHN | ||
38 | -...............................................................**...********.**.......................................................... | ||
39 | -UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA | ||
40 | -((((((((...[[[[[[.))))))))...............[[[[[(...).(.((((((((((((((..........)))))))..((((.]]]]]))))((.((((......)))).)))))))))).]]]]]]. | ||
41 | ->test_3IWN | ||
42 | -....................................................************.*******..................... | ||
43 | -CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG | ||
44 | -((((......((...(((.((....)).)))..[))...(((.((.(((((..((((..........))))))))).].)))))...)).)). | ||
45 | ->test_3KTW | ||
46 | -...............*************............................*.........*****...****................. | ||
47 | -AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU | ||
48 | -..(((..((.(((((((((((((....))))))))))).)).))....(.(((.....(((.....(((....))).....)))..))).).))) | ||
49 | ->test_3MUM | ||
50 | -....................................................***..*******..**....................... | ||
51 | -GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG | ||
52 | -..(.((......((...((((((....))))))..[))...(((.((((((((..((..........))))))).]))))))...))...) | ||
53 | ->test_3MUR | ||
54 | -....................................................****.********.**....................... | ||
55 | -GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG | ||
56 | -..(.((......((...((((((....))))))..[))...(((.((((((((...(..........).))))).]))))))...))...) | ||
57 | ->test_3NDB | ||
58 | -.................................*************..........................**....**********..*********..................................... | ||
59 | -GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC | ||
60 | -((((((..(((((.(((.(((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).))))))).).))))).....)))))) | ||
61 | ->test_3PIO | ||
62 | -.....****.***.............********..**.******.*****..****..*............*******..***....*****.*.......****......******.. | ||
63 | -ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU | ||
64 | -((((((((((.....((.(((((....((((((...............)))..)))...)))))..)).((.......((.(((((....))))).)).......))...)))))))))) | ||
65 | ->test_3V7E | ||
66 | -........*........**............****...................................................*......................................* | ||
67 | -GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA | ||
68 | -((((((((....(.(((...(((.[.[[)))......))))(((..(((((((((((((((((.(....).))))))))))))))))).)))...(]].](((((....)))))..))))))))). | ||
69 | ->test_3W3S | ||
70 | -...................**............................*...........................*.................... | ||
71 | -GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC | ||
72 | -((((((((..((.((((....))))))((((((.......))))).)((((.((((....)))).)))).(((((.......)))))))))))))... | ||
73 | ->test_4UYJ | ||
74 | -......*.............************.....................**...........*****.................***................... | ||
75 | -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC | ||
76 | -(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((....)))..))..)))))))))..))))) | ||
77 | ->test_4UYK | ||
78 | -......*.............************.....................**...........****..........................................***................... | ||
79 | -GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC | ||
80 | -(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((...(((((((((....)))))))))...)))..))..)))))))))..))))) | ||
81 | ->test_4WF9 | ||
82 | -...****..**.............*****.....*...***...******.****.*.............*****.*..***....**..***.....***.......****.. | ||
83 | -UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC | ||
84 | -.(..(..(.....((((.((......((((((...(.....)...))))..)).....)).)).))............(............)..............)..)..). | ||
85 | ->test_4XCO | ||
86 | -..............*************........**........................*******....********................ | ||
87 | -GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG | ||
88 | -((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)))). | ||
89 | ->test_4YB1 | ||
90 | -****.............................................................................*......... | ||
91 | -GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG | ||
92 | -...((((......((...((((((....))))))..[))...(((.(((((((...((..........)).)))).]))))))...)).)) |
1 | -{ | ||
2 | - "1": { | ||
3 | - "occurences": 1, | ||
4 | - "pdb": [ | ||
5 | - "1A1T" | ||
6 | - ], | ||
7 | - "sequence": "GGACUAGCGGAGGCUAGUCC", | ||
8 | - "struct2d": "((((((((....))))))))" | ||
9 | - }, | ||
10 | - "10": { | ||
11 | - "occurences": 1, | ||
12 | - "pdb": [ | ||
13 | - "1AUD" | ||
14 | - ], | ||
15 | - "sequence": "GGCAGAGUCCUUCGGGACAUUGCACCUG", | ||
16 | - "struct2d": "(.(((.((((....)))).......)))" | ||
17 | - }, | ||
18 | - "100": { | ||
19 | - "occurences": 1, | ||
20 | - "pdb": [ | ||
21 | - "1N38" | ||
22 | - ], | ||
23 | - "sequence": "UUAGC", | ||
24 | - "struct2d": "...))" | ||
25 | - }, | ||
26 | - "1000": { | ||
27 | - "occurences": 1, | ||
28 | - "pdb": [ | ||
29 | - "4Z4C" | ||
30 | - ], | ||
31 | - "sequence": "CAAUGUGAC", | ||
32 | - "struct2d": "))))))))." | ||
33 | - }, | ||
34 | - "1001": { | ||
35 | - "occurences": 1, | ||
36 | - "pdb": [ | ||
37 | - "4Z4D" | ||
38 | - ], | ||
39 | - "sequence": "UUCACAUUGCCCAAGUCU&U", | ||
40 | - "struct2d": ".((((((((.........&." | ||
41 | - }, | ||
42 | - "1002": { | ||
43 | - "occurences": 1, | ||
44 | - "pdb": [ | ||
45 | - "4Z4I" | ||
46 | - ], | ||
47 | - "sequence": "CAAUGUGA", | ||
48 | - "struct2d": "))))))))" | ||
49 | - }, | ||
50 | - "1003": { | ||
51 | - "occurences": 1, | ||
52 | - "pdb": [ | ||
53 | - "4Z4F" | ||
54 | - ], | ||
55 | - "sequence": "UUCACAUUGCCCAAGU&U", | ||
56 | - "struct2d": ".((((((((.......&." | ||
57 | - }, | ||
58 | - "1004": { | ||
59 | - "occurences": 1, | ||
60 | - "pdb": [ | ||
61 | - "4Z7L" | ||
62 | - ], | ||
63 | - "sequence": "GCAAAAUAACAAGC", | ||
64 | - "struct2d": "((..........))" | ||
65 | - }, | ||
66 | - "1005": { | ||
67 | - "occurences": 1, | ||
68 | - "pdb": [ | ||
69 | - "4ZDOB" | ||
70 | - ], | ||
71 | - "sequence": "GCCCGGAUGAUCCUCAGUGGUCUGGGGUGCAG&ACCUGU&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCGG", | ||
72 | - "struct2d": "(((((((.(..((((((..[.)))))).((((&..))))&((((.&.))))..((((..]....))))).)))))" | ||
73 | - }, | ||
74 | - "1006": { | ||
75 | - "occurences": 1, | ||
76 | - "pdb": [ | ||
77 | - "4ZDPA" | ||
78 | - ], | ||
79 | - "sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG", | ||
80 | - "struct2d": "(((((((.(..&(((..[.)))...((((((&.))))))&((((.&.))))..((((..]....))))).))))" | ||
81 | - }, | ||
82 | - "1007": { | ||
83 | - "occurences": 1, | ||
84 | - "pdb": [ | ||
85 | - "4ZDPB" | ||
86 | - ], | ||
87 | - "sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG", | ||
88 | - "struct2d": "(((((((.(..&(((..[.)))...(((((.&..)))))&((((.&.))))..((((..]....))))).))))" | ||
89 | - }, | ||
90 | - "1008": { | ||
91 | - "occurences": 1, | ||
92 | - "pdb": [ | ||
93 | - "4ZLD" | ||
94 | - ], | ||
95 | - "sequence": "UAACUUCUGUGAAGUU", | ||
96 | - "struct2d": ".((((((...))))))" | ||
97 | - } | ||
98 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | -{ | ||
2 | - "1": { | ||
3 | - "occurences": 3, | ||
4 | - "pdb": [ | ||
5 | - "1A1T" | ||
6 | - ], | ||
7 | - "pfam": [ | ||
8 | - [ | ||
9 | - "UNK13" | ||
10 | - ], | ||
11 | - [ | ||
12 | - "PF00539", | ||
13 | - "PF08652" | ||
14 | - ], | ||
15 | - [ | ||
16 | - "PF00098" | ||
17 | - ] | ||
18 | - ], | ||
19 | - "sequence": "ACUAGCGGAGGCUAGU", | ||
20 | - "struct2d": "((((((....))))))" | ||
21 | - }, | ||
22 | - "10006": { | ||
23 | - "occurences": 2, | ||
24 | - "pdb": [ | ||
25 | - "1MNB", | ||
26 | - "2A9X" | ||
27 | - ], | ||
28 | - "pfam": [ | ||
29 | - [ | ||
30 | - "PF00539", | ||
31 | - "PF08652" | ||
32 | - ], | ||
33 | - [ | ||
34 | - "UNK13" | ||
35 | - ] | ||
36 | - ], | ||
37 | - "sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA", | ||
38 | - "struct2d": "(((.(((((((....))))))((((....))))))))" | ||
39 | - } | ||
40 | -} |
1 | -{ | ||
2 | - "1":{ | ||
3 | - "occurences":2, | ||
4 | - "pdb":[ | ||
5 | - "1A1T" | ||
6 | - ], | ||
7 | - "pfam":[ | ||
8 | - [ | ||
9 | - "PF00539", | ||
10 | - "PF08652" | ||
11 | - ], | ||
12 | - [ | ||
13 | - "PF00098" | ||
14 | - ] | ||
15 | - ], | ||
16 | - "sequence":"ACUAGCGGAGGCUAGU", | ||
17 | - "struct2d":"((((((....))))))" | ||
18 | - }, | ||
19 | - "10006":{ | ||
20 | - "occurences":2, | ||
21 | - "pdb":[ | ||
22 | - "1MNB", | ||
23 | - "2A9X" | ||
24 | - ], | ||
25 | - "pfam":[ | ||
26 | - [ | ||
27 | - "PF00539", | ||
28 | - "PF08652" | ||
29 | - ], | ||
30 | - [ | ||
31 | - "UNK13" | ||
32 | - ] | ||
33 | - ], | ||
34 | - "sequence":"UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA", | ||
35 | - "struct2d":"(((.(((((((....))))))((((....))))))))" | ||
36 | - } | ||
37 | -} | ||
38 | - | ||
39 | - |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 | -{ | ||
2 | - "927": { | ||
3 | - "contacts": "..****..**...&..&*****.**&**..*****&******&*******&....&*****&.***.....*******&.*****.....*****", | ||
4 | - "occurences": 1, | ||
5 | - "pdb": [ | ||
6 | - "5JVGA", | ||
7 | - "5JVGB" | ||
8 | - ], | ||
9 | - "pfam": [ | ||
10 | - "PF00181", | ||
11 | - "PF00237", | ||
12 | - "PF00238", | ||
13 | - "PF00252", | ||
14 | - "PF00276", | ||
15 | - "PF00281", | ||
16 | - "PF00297", | ||
17 | - "PF00298", | ||
18 | - "PF00327", | ||
19 | - "PF00347", | ||
20 | - "PF00453", | ||
21 | - "PF00467", | ||
22 | - "PF00468", | ||
23 | - "PF00471", | ||
24 | - "PF00572", | ||
25 | - "PF00573", | ||
26 | - "PF00673", | ||
27 | - "PF00828", | ||
28 | - "PF00829", | ||
29 | - "PF00830", | ||
30 | - "PF00831", | ||
31 | - "PF00861", | ||
32 | - "PF01016", | ||
33 | - "PF01196", | ||
34 | - "PF01245", | ||
35 | - "PF01386", | ||
36 | - "PF01632", | ||
37 | - "PF01783", | ||
38 | - "PF03947", | ||
39 | - "PF14693", | ||
40 | - "PF17136" | ||
41 | - ], | ||
42 | - "sequence": "CCCGUGCCCAUAG&GG&CCACCCCA&CCAUGCCGA&CUGGGU&GUGAAAC&CGCC&AUGAU&CGGACCGCAGGGUCCC&AGUCGGUCAGCGCGGG", | ||
43 | - "struct2d": ".((((((.....(&(.&.(((((((&.(.....).&.)))).&)))...)&).((&.....&.(((((....))))).&....))...))))))." | ||
44 | - }, | ||
45 | - | ||
46 | - "170": { | ||
47 | - "contacts": "*****&......***....**...****............*****.....******.....&.........&..&...*.****", | ||
48 | - "occurences": 1, | ||
49 | - "pdb": [ | ||
50 | - "1WZ2" | ||
51 | - ], | ||
52 | - "pfam": [ | ||
53 | - "PF00133", | ||
54 | - "PF08264" | ||
55 | - ], | ||
56 | - "sequence": "GCGGG&GUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCC&GUUCGAAUC&GC&CCCGCACCA", | ||
57 | - "struct2d": "(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&)))))...." | ||
58 | - }, | ||
59 | - | ||
60 | - "768": { | ||
61 | - "contacts": "..*.&..........************.....................**..&.....&*****&...&***&.....&....", | ||
62 | - "occurences": 1, | ||
63 | - "pdb": [ | ||
64 | - "4UYJ" | ||
65 | - ], | ||
66 | - "pfam": [ | ||
67 | - "PF02290", | ||
68 | - "PF05486" | ||
69 | - ], | ||
70 | - "sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAGG&GUU&UUC&CCUCG&CGUG", | ||
71 | - "struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&(((((&(..&..)&)))))&)..)" | ||
72 | - }, | ||
73 | - | ||
74 | - "770": { | ||
75 | - "contacts": "..*.&..........************.....................**..&.....&****&...&***&....&....", | ||
76 | - "occurences": 1, | ||
77 | - "pdb": [ | ||
78 | - "4UYK" | ||
79 | - ], | ||
80 | - "pfam": [ | ||
81 | - "PF02290", | ||
82 | - "PF05486" | ||
83 | - ], | ||
84 | - "sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAG&GUU&UUC&CUCG&CGUG", | ||
85 | - "struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&((((&(..&..)&))))&)..)" | ||
86 | - }, | ||
87 | - "266": { | ||
88 | - "contacts": "***************************************************************************************************", | ||
89 | - "occurences": 1, | ||
90 | - "pdb": [ | ||
91 | - "2GTT" | ||
92 | - ], | ||
93 | - "pfam": [ | ||
94 | - "PF00945" | ||
95 | - ], | ||
96 | - "sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC", | ||
97 | - "struct2d": "..................................................................................................." | ||
98 | - }, | ||
99 | - "267": { | ||
100 | - "contacts": "***************************************************************************************************", | ||
101 | - "occurences": 1, | ||
102 | - "pdb": [ | ||
103 | - "2GTT" | ||
104 | - ], | ||
105 | - "pfam": [ | ||
106 | - "PF00945" | ||
107 | - ], | ||
108 | - "sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC", | ||
109 | - "struct2d": "..................................................................................................." | ||
110 | - }, | ||
111 | - | ||
112 | - "766": { | ||
113 | - "contacts": "***************************************************..********", | ||
114 | - "occurences": 1, | ||
115 | - "pdb": [ | ||
116 | - "4U7U" | ||
117 | - ], | ||
118 | - "pfam": [ | ||
119 | - "PF08798", | ||
120 | - "PF09344", | ||
121 | - "PF09481", | ||
122 | - "PF09485", | ||
123 | - "PF09704" | ||
124 | - ], | ||
125 | - "sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG", | ||
126 | - "struct2d": ".............................................((((((....))))))" | ||
127 | - }, | ||
128 | - | ||
129 | - "851": { | ||
130 | - "contacts": "***************************************************...*******", | ||
131 | - "occurences": 1, | ||
132 | - "pdb": [ | ||
133 | - "5CD4" | ||
134 | - ], | ||
135 | - "pfam": [ | ||
136 | - "PF08798", | ||
137 | - "PF09344", | ||
138 | - "PF09481", | ||
139 | - "PF09485", | ||
140 | - "PF09704" | ||
141 | - ], | ||
142 | - "sequence": "AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG", | ||
143 | - "struct2d": ".............................................((((((....))))))" | ||
144 | - }, | ||
145 | - | ||
146 | - "948": { | ||
147 | - "contacts": "******************************************", | ||
148 | - "occurences": 1, | ||
149 | - "pdb": [ | ||
150 | - "5O7H" | ||
151 | - ], | ||
152 | - "pfam": [ | ||
153 | - "PF09618" | ||
154 | - ], | ||
155 | - "sequence": "CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG", | ||
156 | - "struct2d": "...........................(((((.....)))))" | ||
157 | - }, | ||
158 | - | ||
159 | - "972": { | ||
160 | - "contacts": "******************************************", | ||
161 | - "occurences": 1, | ||
162 | - "pdb": [ | ||
163 | - "5WLH" | ||
164 | - ], | ||
165 | - "pfam": [ | ||
166 | - "UNK81" | ||
167 | - ], | ||
168 | - "sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG", | ||
169 | - "struct2d": "......(((((........))))..)................" | ||
170 | - }, | ||
171 | - | ||
172 | - | ||
173 | - | ||
174 | - | ||
175 | - | ||
176 | -} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | -{ | ||
2 | - "159": { | ||
3 | - "contacts": "..&************&**&.&******.*.****..***.****************&..&.****.&...*****...**...*****&....*******", | ||
4 | - "occurences": 1, | ||
5 | - "pdb": [ | ||
6 | - "1VQ6" | ||
7 | - ], | ||
8 | - "pfam": [ | ||
9 | - "PF00181", | ||
10 | - "PF00237", | ||
11 | - "PF00238", | ||
12 | - "PF00252", | ||
13 | - "PF00276", | ||
14 | - "PF00281", | ||
15 | - "PF00297", | ||
16 | - "PF00298", | ||
17 | - "PF00327", | ||
18 | - "PF00347", | ||
19 | - "PF00466", | ||
20 | - "PF00467", | ||
21 | - "PF00572", | ||
22 | - "PF00573", | ||
23 | - "PF00673", | ||
24 | - "PF00827", | ||
25 | - "PF00828", | ||
26 | - "PF00831", | ||
27 | - "PF00832", | ||
28 | - "PF00935", | ||
29 | - "PF01157", | ||
30 | - "PF01198", | ||
31 | - "PF01246", | ||
32 | - "PF01248", | ||
33 | - "PF01280", | ||
34 | - "PF01655", | ||
35 | - "PF01780", | ||
36 | - "PF01907", | ||
37 | - "PF03947", | ||
38 | - "PF16906", | ||
39 | - "PF17144" | ||
40 | - ], | ||
41 | - "sequence": "UU&GGCGGCCACAGC&GU&G&GCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&AC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA", | ||
42 | - "struct2d": "..&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&((((((.((....))))))))&)...))))))." | ||
43 | - }, | ||
44 | - "122": { | ||
45 | - "contacts": "***************&.&*&*****.******..***.****************&...&.****.&...*****..***...*****&....*******", | ||
46 | - "occurences": 1, | ||
47 | - "pdb": [ | ||
48 | - "1Q81", | ||
49 | - "1Q82", | ||
50 | - "3CPW" | ||
51 | - ], | ||
52 | - "pfam": [ | ||
53 | - "PF00181", | ||
54 | - "PF00237", | ||
55 | - "PF00238", | ||
56 | - "PF00252", | ||
57 | - "PF00276", | ||
58 | - "PF00281", | ||
59 | - "PF00297", | ||
60 | - "PF00327", | ||
61 | - "PF00347", | ||
62 | - "PF00466", | ||
63 | - "PF00467", | ||
64 | - "PF00572", | ||
65 | - "PF00573", | ||
66 | - "PF00673", | ||
67 | - "PF00827", | ||
68 | - "PF00828", | ||
69 | - "PF00831", | ||
70 | - "PF00832", | ||
71 | - "PF00935", | ||
72 | - "PF01157", | ||
73 | - "PF01198", | ||
74 | - "PF01246", | ||
75 | - "PF01248", | ||
76 | - "PF01280", | ||
77 | - "PF01655", | ||
78 | - "PF01780", | ||
79 | - "PF01907", | ||
80 | - "PF03947", | ||
81 | - "PF16906", | ||
82 | - "PF17144" | ||
83 | - ], | ||
84 | - "sequence": "GGCGGCCACAGCGGU&G&U&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA", | ||
85 | - "struct2d": "((((((....(((((&(&.&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))." | ||
86 | - }, | ||
87 | - "264": { | ||
88 | - "contacts": "***************************************************************************************************", | ||
89 | - "occurences": 1, | ||
90 | - "pdb": [ | ||
91 | - "2GTT" | ||
92 | - ], | ||
93 | - "pfam": [ | ||
94 | - "PF00945" | ||
95 | - ], | ||
96 | - "sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC", | ||
97 | - "struct2d": "..................................................................................................." | ||
98 | - }, | ||
99 | - "265": { | ||
100 | - "contacts": "***************************************************************************************************", | ||
101 | - "occurences": 1, | ||
102 | - "pdb": [ | ||
103 | - "2GTT" | ||
104 | - ], | ||
105 | - "pfam": [ | ||
106 | - "PF00945" | ||
107 | - ], | ||
108 | - "sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC", | ||
109 | - "struct2d": "..................................................................................................." | ||
110 | - }, | ||
111 | - "109": { | ||
112 | - "contacts": "***************&.&************..***.****************&...&.****.&...*****...**...*****&....*******", | ||
113 | - "occurences": 2, | ||
114 | - "pdb": [ | ||
115 | - "1N8R", | ||
116 | - "1W2B" | ||
117 | - ], | ||
118 | - "pfam": [ | ||
119 | - "PF00181", | ||
120 | - "PF00237", | ||
121 | - "PF00238", | ||
122 | - "PF00252", | ||
123 | - "PF00276", | ||
124 | - "PF00281", | ||
125 | - "PF00297", | ||
126 | - "PF00327", | ||
127 | - "PF00347", | ||
128 | - "PF00466", | ||
129 | - "PF00467", | ||
130 | - "PF00572", | ||
131 | - "PF00573", | ||
132 | - "PF00673", | ||
133 | - "PF00827", | ||
134 | - "PF00828", | ||
135 | - "PF00831", | ||
136 | - "PF00832", | ||
137 | - "PF00935", | ||
138 | - "PF01157", | ||
139 | - "PF01198", | ||
140 | - "PF01246", | ||
141 | - "PF01248", | ||
142 | - "PF01280", | ||
143 | - "PF01655", | ||
144 | - "PF01780", | ||
145 | - "PF01907", | ||
146 | - "PF03947", | ||
147 | - "PF05697", | ||
148 | - "PF16906", | ||
149 | - "PF17144" | ||
150 | - ], | ||
151 | - "sequence": "GGCGGCCACAGCGGU&G&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA", | ||
152 | - "struct2d": "((((((....(((((&(&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))." | ||
153 | - }, | ||
154 | - "762": { | ||
155 | - "contacts": "***************************************************..********", | ||
156 | - "occurences": 1, | ||
157 | - "pdb": [ | ||
158 | - "4U7U" | ||
159 | - ], | ||
160 | - "pfam": [ | ||
161 | - "PF08798", | ||
162 | - "PF09344", | ||
163 | - "PF09481", | ||
164 | - "PF09485", | ||
165 | - "PF09704" | ||
166 | - ], | ||
167 | - "sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG", | ||
168 | - "struct2d": ".............................................((((((....))))))" | ||
169 | - }, | ||
170 | - "968": { | ||
171 | - "contacts": "******************************************", | ||
172 | - "occurences": 1, | ||
173 | - "pdb": [ | ||
174 | - "5WLH" | ||
175 | - ], | ||
176 | - "pfam": [ | ||
177 | - "UNK81" | ||
178 | - ], | ||
179 | - "sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG", | ||
180 | - "struct2d": "......(((((........))))..)................" | ||
181 | - }, | ||
182 | - "962": { | ||
183 | - "contacts": "*****************************************", | ||
184 | - "occurences": 2, | ||
185 | - "pdb": [ | ||
186 | - "5W1H" | ||
187 | - ], | ||
188 | - "pfam": [ | ||
189 | - "UNK75" | ||
190 | - ], | ||
191 | - "sequence": "AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG", | ||
192 | - "struct2d": ".....(((((........))))..)................" | ||
193 | - }, | ||
194 | - "62": { | ||
195 | - "contacts": ".*****..********.......*****.**....**", | ||
196 | - "occurences": 1, | ||
197 | - "pdb": [ | ||
198 | - "1I6U" | ||
199 | - ], | ||
200 | - "pfam": [ | ||
201 | - "PF00410" | ||
202 | - ], | ||
203 | - "sequence": "GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC", | ||
204 | - "struct2d": "(((((((((.((((((....))))..)))))))))))" | ||
205 | - }, | ||
206 | -} |
This diff could not be displayed because it is too large.
1 | -{ | ||
2 | - "1": { | ||
3 | - "occurences": 2, | ||
4 | - "pdb": [ | ||
5 | - "1A1T" | ||
6 | - ], | ||
7 | - "pfam": [ | ||
8 | - "PF00098" | ||
9 | - ], | ||
10 | - "sequence": "ACUAGCGGAGGCUAGU", | ||
11 | - "struct2d": "((((((....))))))" | ||
12 | - }, | ||
13 | - "100006": { | ||
14 | - "occurences": 2, | ||
15 | - "pdb": [ | ||
16 | - "1MNB", | ||
17 | - "2A9X" | ||
18 | - ], | ||
19 | - "pfam": [ | ||
20 | - "PF00539", | ||
21 | - "UNK13" | ||
22 | - ], | ||
23 | - "sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA", | ||
24 | - "struct2d": "(((.(((((((....))))))((((....))))))))" | ||
25 | - }, | ||
26 | - "104": { | ||
27 | - "occurences": 3, | ||
28 | - "pdb": [ | ||
29 | - "1MNB", | ||
30 | - "2A9X" | ||
31 | - ], | ||
32 | - "pfam": [ | ||
33 | - "PF00539", | ||
34 | - "UNK13" | ||
35 | - ], | ||
36 | - "sequence": "UCGUG&AGCUCAUUAGCUCCGA", | ||
37 | - "struct2d": "(((.(&((((....))))))))" | ||
38 | - } | ||
39 | -} |
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
1 | -{ | ||
2 | - "103": { | ||
3 | - "occurences": 1, | ||
4 | - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA", | ||
5 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
6 | - }, | ||
7 | - "1103": { | ||
8 | - "occurences": 1, | ||
9 | - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA", | ||
10 | - "struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...." | ||
11 | - }, | ||
12 | - "1104": { | ||
13 | - "occurences": 1, | ||
14 | - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA", | ||
15 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
16 | - }, | ||
17 | - "111": { | ||
18 | - "occurences": 1, | ||
19 | - "sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA", | ||
20 | - "struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...." | ||
21 | - }, | ||
22 | - "141": { | ||
23 | - "occurences": 1, | ||
24 | - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA", | ||
25 | - "struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...." | ||
26 | - }, | ||
27 | - "16": { | ||
28 | - "occurences": 1, | ||
29 | - "sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA", | ||
30 | - "struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
31 | - }, | ||
32 | - "281": { | ||
33 | - "occurences": 1, | ||
34 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
35 | - "struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....." | ||
36 | - }, | ||
37 | - "282": { | ||
38 | - "occurences": 1, | ||
39 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
40 | - "struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....." | ||
41 | - }, | ||
42 | - "37": { | ||
43 | - "occurences": 1, | ||
44 | - "sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA", | ||
45 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
46 | - }, | ||
47 | - "453": { | ||
48 | - "occurences": 1, | ||
49 | - "sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC", | ||
50 | - "struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).." | ||
51 | - }, | ||
52 | - "454": { | ||
53 | - "occurences": 1, | ||
54 | - "sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC", | ||
55 | - "struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))." | ||
56 | - }, | ||
57 | - "46": { | ||
58 | - "occurences": 1, | ||
59 | - "sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA", | ||
60 | - "struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...." | ||
61 | - }, | ||
62 | - "470": { | ||
63 | - "occurences": 1, | ||
64 | - "sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA", | ||
65 | - "struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...." | ||
66 | - }, | ||
67 | - "536": { | ||
68 | - "occurences": 2, | ||
69 | - "sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA", | ||
70 | - "struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))" | ||
71 | - }, | ||
72 | - "645": { | ||
73 | - "occurences": 1, | ||
74 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
75 | - "struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......." | ||
76 | - }, | ||
77 | - "671": { | ||
78 | - "occurences": 1, | ||
79 | - "sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC", | ||
80 | - "struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))" | ||
81 | - }, | ||
82 | - "680": { | ||
83 | - "occurences": 1, | ||
84 | - "sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC", | ||
85 | - "struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))" | ||
86 | - }, | ||
87 | - "72": { | ||
88 | - "occurences": 1, | ||
89 | - "sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA", | ||
90 | - "struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...." | ||
91 | - }, | ||
92 | - "955": { | ||
93 | - "occurences": 1, | ||
94 | - "sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC", | ||
95 | - "struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...." | ||
96 | - }, | ||
97 | - "985": { | ||
98 | - "occurences": 1, | ||
99 | - "sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU", | ||
100 | - "struct2d": ".............................................................................." | ||
101 | - } | ||
102 | -} |
1 | -{ | ||
2 | - "103": { | ||
3 | - "occurences": 1, | ||
4 | - "pdb": [ | ||
5 | - "1TTT" | ||
6 | - ], | ||
7 | - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA", | ||
8 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
9 | - }, | ||
10 | - "1103": { | ||
11 | - "occurences": 1, | ||
12 | - "pdb": [ | ||
13 | - "5HC9" | ||
14 | - ], | ||
15 | - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA", | ||
16 | - "struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...." | ||
17 | - }, | ||
18 | - "1104": { | ||
19 | - "occurences": 1, | ||
20 | - "pdb": [ | ||
21 | - "5HC9" | ||
22 | - ], | ||
23 | - "sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA", | ||
24 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
25 | - }, | ||
26 | - "111": { | ||
27 | - "occurences": 1, | ||
28 | - "pdb": [ | ||
29 | - "1QF6" | ||
30 | - ], | ||
31 | - "sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA", | ||
32 | - "struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...." | ||
33 | - }, | ||
34 | - "141": { | ||
35 | - "occurences": 1, | ||
36 | - "pdb": [ | ||
37 | - "1TTT" | ||
38 | - ], | ||
39 | - "sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA", | ||
40 | - "struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...." | ||
41 | - }, | ||
42 | - "16": { | ||
43 | - "occurences": 1, | ||
44 | - "pdb": [ | ||
45 | - "1C0A" | ||
46 | - ], | ||
47 | - "sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA", | ||
48 | - "struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
49 | - }, | ||
50 | - "281": { | ||
51 | - "occurences": 1, | ||
52 | - "pdb": [ | ||
53 | - "2FMT" | ||
54 | - ], | ||
55 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
56 | - "struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....." | ||
57 | - }, | ||
58 | - "282": { | ||
59 | - "occurences": 1, | ||
60 | - "pdb": [ | ||
61 | - "2FMT" | ||
62 | - ], | ||
63 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
64 | - "struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....." | ||
65 | - }, | ||
66 | - "37": { | ||
67 | - "occurences": 1, | ||
68 | - "pdb": [ | ||
69 | - "1EIY" | ||
70 | - ], | ||
71 | - "sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA", | ||
72 | - "struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...." | ||
73 | - }, | ||
74 | - "453": { | ||
75 | - "occurences": 1, | ||
76 | - "pdb": [ | ||
77 | - "2ZUFB" | ||
78 | - ], | ||
79 | - "sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC", | ||
80 | - "struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).." | ||
81 | - }, | ||
82 | - "454": { | ||
83 | - "occurences": 1, | ||
84 | - "pdb": [ | ||
85 | - "2ZZM" | ||
86 | - ], | ||
87 | - "sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC", | ||
88 | - "struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))." | ||
89 | - }, | ||
90 | - "46": { | ||
91 | - "occurences": 1, | ||
92 | - "pdb": [ | ||
93 | - "1F7U" | ||
94 | - ], | ||
95 | - "sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA", | ||
96 | - "struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...." | ||
97 | - }, | ||
98 | - "470": { | ||
99 | - "occurences": 1, | ||
100 | - "pdb": [ | ||
101 | - "3AMU" | ||
102 | - ], | ||
103 | - "sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA", | ||
104 | - "struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...." | ||
105 | - }, | ||
106 | - "536": { | ||
107 | - "occurences": 2, | ||
108 | - "pdb": [ | ||
109 | - "3IVKB" | ||
110 | - ], | ||
111 | - "sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA", | ||
112 | - "struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))" | ||
113 | - }, | ||
114 | - "645": { | ||
115 | - "occurences": 1, | ||
116 | - "pdb": [ | ||
117 | - "3QSY" | ||
118 | - ], | ||
119 | - "sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA", | ||
120 | - "struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......." | ||
121 | - }, | ||
122 | - "671": { | ||
123 | - "occurences": 1, | ||
124 | - "pdb": [ | ||
125 | - "3UMY" | ||
126 | - ], | ||
127 | - "sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC", | ||
128 | - "struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))" | ||
129 | - }, | ||
130 | - "680": { | ||
131 | - "occurences": 1, | ||
132 | - "pdb": [ | ||
133 | - "3W3S" | ||
134 | - ], | ||
135 | - "sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC", | ||
136 | - "struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))" | ||
137 | - }, | ||
138 | - "72": { | ||
139 | - "occurences": 1, | ||
140 | - "pdb": [ | ||
141 | - "1J2B" | ||
142 | - ], | ||
143 | - "sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA", | ||
144 | - "struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...." | ||
145 | - }, | ||
146 | - "955": { | ||
147 | - "occurences": 1, | ||
148 | - "pdb": [ | ||
149 | - "4X0B" | ||
150 | - ], | ||
151 | - "sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC", | ||
152 | - "struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...." | ||
153 | - }, | ||
154 | - "985": { | ||
155 | - "occurences": 1, | ||
156 | - "pdb": [ | ||
157 | - "4XJN" | ||
158 | - ], | ||
159 | - "sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU", | ||
160 | - "struct2d": ".............................................................................." | ||
161 | - } | ||
162 | -} |
This diff could not be displayed because it is too large.
1 | ->test_CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE__PDB_00376 | ||
2 | -.......***.......................******.................................. | ||
3 | -GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA | ||
4 | -((((((..(((.........)))((((((((...))))))))...(((((.......)))))))))))..... | ||
5 | ->test_GUANINE_RIBOSWITCH_U22C,_A52G_MUTANT_BOUND_TO_HYPOXANTHINE__PDB_01023 | ||
6 | -............................*********.............................** | ||
7 | -GGACAUACAAUCGCGUGGAUAUGGCACGCAAGUUUCUGCCGGGCACCGUAAAUGUCCGACUAUGUCCa | ||
8 | -(((((((...(((((((.[[..[[)))))))........((((((]]...]]))))))..))))))). | ||
9 | ->test_SOLUTION_STRUCTURE_OF_THE_P2B-P3_PSEUDOKNOT_FROM_HUMAN_TELOMERASE_RNA__PDB_00857 | ||
10 | -.............................*****............. | ||
11 | -GGGCUGUUUUUCUCGCUGACUUUCAGCCCCAAACAAAAAAGUCAGCA | ||
12 | -[[[[[[........(((((((((]]]]]]........))))))))). |
data/modules/ISAURE/Readme.md
0 → 100644
1 | +The motif library used with --contacts is particular. It was provided by Isaure Chauvot de Beauchêne from the LORIA | ||
2 | +laboratory. These motifs are made up of RNA fragments linked to proteins. | ||
3 | +================================================================================================================== | ||
4 | + | ||
5 | +Several versions of these designs have been provided, but the most complete is the latest:'motifs_06-06-2021.json' | ||
6 | +The current scripts were created based on this file, and doesn't work with the other older libraries. | ||
7 | + | ||
8 | +There is also 2 benchmarks files also in json format : 'benchmark_16-06-2021.json' and 'benchmark_16-07-2021.json'. | ||
9 | +It contains complete RNA sequences that bind to a protein, the first one contains only 33 RNA, and the second one | ||
10 | +contains 130 RNA. | ||
11 | + | ||
12 | +The benchmark.dbn and benchmark.txt were created based on the 'benchmark_16-07-2021.json'. | ||
13 | +They are mostly used for the Isaure_benchmark.py script and scripts from the 'scripts' directory. | ||
14 | + | ||
15 | +The motifs_final.json it obtains after executing the count_pattern.cpp script in 'script' directory on | ||
16 | +the 'motifs_06-06-2021.json' motifs file. | ||
17 | +This script count the number of "occurrences" of the motif. So we consider that if the sequence of motif A | ||
18 | +is included in motif B, then for each inclusion of B we also have an inclusion of A. And vice versa. | ||
19 | + | ||
20 | +The motif library used by BiORSEO is the one in the 'bibliotheque_a_lire' directory. There should only be | ||
21 | +the json file we wish to be used by BiORSEO for it's prediction. That's why you shouldn't put other type of file! | ||
22 | + | ||
23 | + | ||
24 | + | ||
25 | + | ||
26 | + | ||
27 | + |
File moved
File moved
File moved
... | @@ -22341,23 +22341,6 @@ | ... | @@ -22341,23 +22341,6 @@ |
22341 | "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", | 22341 | "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", |
22342 | "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." | 22342 | "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." |
22343 | }, | 22343 | }, |
22344 | - "977": { | ||
22345 | - "contacts": "******************..&****************.**************&*************&*", | ||
22346 | - "occurences": 1, | ||
22347 | - "pdb": [ | ||
22348 | - "5XBL" | ||
22349 | - ], | ||
22350 | - "pfam": [ | ||
22351 | - [ | ||
22352 | - "PF16592", | ||
22353 | - "PF16593", | ||
22354 | - "PF16595", | ||
22355 | - "PF13395" | ||
22356 | - ] | ||
22357 | - ], | ||
22358 | - "sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U", | ||
22359 | - "struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&." | ||
22360 | - }, | ||
22361 | "978": { | 22344 | "978": { |
22362 | "contacts": "*****", | 22345 | "contacts": "*****", |
22363 | "occurences": 9, | 22346 | "occurences": 9, | ... | ... |
... | @@ -22341,23 +22341,6 @@ | ... | @@ -22341,23 +22341,6 @@ |
22341 | "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", | 22341 | "sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA", |
22342 | "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." | 22342 | "struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...." |
22343 | }, | 22343 | }, |
22344 | - "977": { | ||
22345 | - "contacts": "******************..&****************.**************&*************&*", | ||
22346 | - "occurences": 1, | ||
22347 | - "pdb": [ | ||
22348 | - "5XBL" | ||
22349 | - ], | ||
22350 | - "pfam": [ | ||
22351 | - [ | ||
22352 | - "PF16592", | ||
22353 | - "PF16593", | ||
22354 | - "PF16595", | ||
22355 | - "PF13395" | ||
22356 | - ] | ||
22357 | - ], | ||
22358 | - "sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U", | ||
22359 | - "struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&." | ||
22360 | - }, | ||
22361 | "978": { | 22344 | "978": { |
22362 | "contacts": "*****", | 22345 | "contacts": "*****", |
22363 | "occurences": 9, | 22346 | "occurences": 9, | ... | ... |
... | @@ -6,6 +6,9 @@ import seaborn as sns | ... | @@ -6,6 +6,9 @@ import seaborn as sns |
6 | import pandas as pd | 6 | import pandas as pd |
7 | import matplotlib.pylab as plt | 7 | import matplotlib.pylab as plt |
8 | 8 | ||
9 | +# Retrieve for each rna the best value for MEA and compare this energy value with the one obtains with | ||
10 | +# RNAeval and RNAfold from the ViennaRNA Package 2.0 (Ronny Lorentz et al., 2011) | ||
11 | +# After getting those values, it will creates a figure. | ||
9 | def get_result_MEA(filename): | 12 | def get_result_MEA(filename): |
10 | ext = "json_pmE" | 13 | ext = "json_pmE" |
11 | file2 = open( "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/" + filename + ext, "r") | 14 | file2 = open( "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/" + filename + ext, "r") | ... | ... |
scripts/Stats.py
deleted
100644 → 0
1 | -from math import sqrt, ceil | ||
2 | -import numpy as np | ||
3 | -import matplotlib.pyplot as plt | ||
4 | - | ||
5 | -file = open("/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn", "r") | ||
6 | -name = file.readline() | ||
7 | -rna = file.readline() | ||
8 | -twod = file.readline() | ||
9 | -contacts = file.readline() | ||
10 | -length = len(rna) | ||
11 | -nb_ctc = contacts.count('*') | ||
12 | -print("--------------------------------------------------------") | ||
13 | - | ||
14 | -ctc_max = nb_ctc | ||
15 | -ctc_min = nb_ctc | ||
16 | - | ||
17 | -np_lgt = [] | ||
18 | -np_lgt.append(length) | ||
19 | - | ||
20 | -np_ctc = [] | ||
21 | -np_ctc.append(nb_ctc) | ||
22 | - | ||
23 | -np = [] | ||
24 | -np.append([length, nb_ctc]) | ||
25 | - | ||
26 | -while name: | ||
27 | - print(contacts) | ||
28 | - print(length) | ||
29 | - print(nb_ctc) | ||
30 | - print("--------------------------------------------------------") | ||
31 | - | ||
32 | - name = file.readline() | ||
33 | - rna = file.readline() | ||
34 | - length = len(rna) | ||
35 | - if length != 0 : | ||
36 | - np_lgt.append(length) | ||
37 | - twod = file.readline() | ||
38 | - contacts = file.readline() | ||
39 | - nb_ctc = contacts.count('*') | ||
40 | - if nb_ctc != 0: | ||
41 | - np_ctc.append(nb_ctc) | ||
42 | - np.append([length, nb_ctc]) | ||
43 | - if nb_ctc > ctc_max: | ||
44 | - ctc_max = nb_ctc | ||
45 | - if nb_ctc < ctc_min and nb_ctc != 0: | ||
46 | - ctc_min = nb_ctc | ||
47 | -file.close() | ||
48 | -print(np_lgt) | ||
49 | -print(np_ctc) | ||
50 | -print(np) | ||
51 | - | ||
52 | -x = np_lgt | ||
53 | -y = np_ctc | ||
54 | - | ||
55 | -index = np_ctc.index(ctc_max) | ||
56 | -index2 = np_ctc.index(ctc_min) | ||
57 | - | ||
58 | -plt.scatter(x, y, c = 'blue') | ||
59 | -plt.annotate("(" + str(np_lgt[index]) + "," + str(ctc_max) + ")", (np_lgt[index], ctc_max),c ='red') | ||
60 | -plt.scatter(np_lgt[index], ctc_max,c = 'red') | ||
61 | -plt.annotate("(" + str(np_lgt[index2]) + "," + str(ctc_min) + ")", (np_lgt[index2], ctc_min),c ='green') | ||
62 | -plt.scatter(np_lgt[index2], ctc_min,c = 'green') | ||
63 | - | ||
64 | -plt.xlabel('longeur de l\'arn') | ||
65 | -plt.ylabel('nombre de contacts') | ||
66 | -plt.savefig('stats.png') |
... | @@ -11,6 +11,7 @@ | ... | @@ -11,6 +11,7 @@ |
11 | using namespace std; | 11 | using namespace std; |
12 | using json = nlohmann::json; | 12 | using json = nlohmann::json; |
13 | 13 | ||
14 | +//Count the number of '&' in the motif sequence | ||
14 | size_t count_delimiter(string& seq) { | 15 | size_t count_delimiter(string& seq) { |
15 | size_t count = 0; | 16 | size_t count = 0; |
16 | for(uint i = 0; i < seq.size(); i++) { | 17 | for(uint i = 0; i < seq.size(); i++) { |
... | @@ -22,6 +23,10 @@ size_t count_delimiter(string& seq) { | ... | @@ -22,6 +23,10 @@ size_t count_delimiter(string& seq) { |
22 | return count; | 23 | return count; |
23 | } | 24 | } |
24 | 25 | ||
26 | +/* | ||
27 | +If there is a '&' in the motif sequence in the field 'sequence' but not in the field 'contacts', | ||
28 | +th script put a '&' in the same position in the field 'contacts' than in the field 'sequence'. | ||
29 | +*/ | ||
25 | void add_delimiter(const string& jsonfile, const string& jsonoutfile) { | 30 | void add_delimiter(const string& jsonfile, const string& jsonoutfile) { |
26 | std::ifstream lib(jsonfile); | 31 | std::ifstream lib(jsonfile); |
27 | 32 | ||
... | @@ -77,13 +82,9 @@ void add_delimiter(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -77,13 +82,9 @@ void add_delimiter(const string& jsonfile, const string& jsonoutfile) { |
77 | 82 | ||
78 | int main() | 83 | int main() |
79 | { | 84 | { |
80 | - //183 | 85 | + string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json"; |
81 | - //cout << "------------------BEGIN-----------------" << endl; | 86 | + string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_tmp.json"; |
82 | - string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json"; | ||
83 | - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_tmp.json"; | ||
84 | add_delimiter(jsonfile, out); | 87 | add_delimiter(jsonfile, out); |
85 | - | ||
86 | - //cout << "------------------END-----------------" << endl; | ||
87 | return 0; | 88 | return 0; |
88 | } | 89 | } |
89 | 90 | ... | ... |
... | @@ -11,6 +11,12 @@ | ... | @@ -11,6 +11,12 @@ |
11 | using namespace std; | 11 | using namespace std; |
12 | using json = nlohmann::json; | 12 | using json = nlohmann::json; |
13 | 13 | ||
14 | +/* | ||
15 | +This script count the number of "occurrences" of the motif. | ||
16 | +So we consider that if the sequence of pattern A is included in pattern B, | ||
17 | +then for each inclusion of B we also have an inclusion of A. And vice versa. | ||
18 | +*/ | ||
19 | + | ||
14 | //Return true if the first sequence seq1 is included in the second sequence seq2 | 20 | //Return true if the first sequence seq1 is included in the second sequence seq2 |
15 | //if not return false | 21 | //if not return false |
16 | int is_contains(string& seq1, string& seq2) { | 22 | int is_contains(string& seq1, string& seq2) { |
... | @@ -38,6 +44,8 @@ int is_contains(string& seq1, string& seq2) { | ... | @@ -38,6 +44,8 @@ int is_contains(string& seq1, string& seq2) { |
38 | 44 | ||
39 | //If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B, | 45 | //If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B, |
40 | //remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list. | 46 | //remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list. |
47 | +//The pattern A is counted only once in every other pattern, i.e. even if the sequence of A is found several times in B, | ||
48 | +// it will be added only once in the occurrences of A. | ||
41 | void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | 49 | void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
42 | std::ifstream lib(jsonfile); | 50 | std::ifstream lib(jsonfile); |
43 | std::ifstream lib2(jsonfile); | 51 | std::ifstream lib2(jsonfile); |
... | @@ -73,14 +81,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -73,14 +81,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
73 | if (!test.compare("pfam")) { | 81 | if (!test.compare("pfam")) { |
74 | vector<vector<string>> tab = it2.value(); | 82 | vector<vector<string>> tab = it2.value(); |
75 | list_pfams = tab; | 83 | list_pfams = tab; |
76 | - /*set<set<string>>::iterator iit; | ||
77 | - set<string>::iterator iit2; | ||
78 | - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) { | ||
79 | - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) { | ||
80 | - cout << *iit2 << endl; | ||
81 | - } | ||
82 | - cout << endl << endl; | ||
83 | - }*/ | ||
84 | } else if (!test.compare("sequence")) { | 84 | } else if (!test.compare("sequence")) { |
85 | //cout << "sequence: " << it2.value() << endl; | 85 | //cout << "sequence: " << it2.value() << endl; |
86 | sequence = it2.value(); | 86 | sequence = it2.value(); |
... | @@ -124,7 +124,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -124,7 +124,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
124 | new_id[test] = it2.value(); | 124 | new_id[test] = it2.value(); |
125 | } | 125 | } |
126 | } | 126 | } |
127 | - //cout << "-------begin---------" << endl; | ||
128 | 127 | ||
129 | for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) { | 128 | for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) { |
130 | string id2 = it3.key(); | 129 | string id2 = it3.key(); |
... | @@ -142,22 +141,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -142,22 +141,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
142 | if (!test.compare("pfam")) { | 141 | if (!test.compare("pfam")) { |
143 | vector<vector<string>> tab = it4.value(); | 142 | vector<vector<string>> tab = it4.value(); |
144 | list_pfams2 = tab; | 143 | list_pfams2 = tab; |
145 | - /*for (uint k = 0; k < tab2.size(); k++) { | ||
146 | - for (uint l = 0; l < tab2[k].size(); l++) { | ||
147 | - pfams2.insert(tab2[k][l]); | ||
148 | - } | ||
149 | - list_pfams2.insert(pfams); | ||
150 | - pfams2.clear(); | ||
151 | - }*/ | ||
152 | - | ||
153 | - /*set<set<string>>::iterator iit; | ||
154 | - set<string>::iterator iit2; | ||
155 | - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) { | ||
156 | - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) { | ||
157 | - cout << *iit2 << endl; | ||
158 | - } | ||
159 | - cout << endl << endl; | ||
160 | - }*/ | ||
161 | } else if (!test.compare("occurences")) { | 144 | } else if (!test.compare("occurences")) { |
162 | occurences2 = it4.value(); | 145 | occurences2 = it4.value(); |
163 | //cout << "occurences2: "<< occurences2 << endl; | 146 | //cout << "occurences2: "<< occurences2 << endl; |
... | @@ -216,7 +199,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -216,7 +199,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
216 | 199 | ||
217 | } | 200 | } |
218 | } | 201 | } |
219 | - //cout << "----end----" << endl; | ||
220 | //} | 202 | //} |
221 | } | 203 | } |
222 | if(flag) { | 204 | if(flag) { |
... | @@ -242,23 +224,12 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -242,23 +224,12 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
242 | //cout << endl;*/ | 224 | //cout << endl;*/ |
243 | } | 225 | } |
244 | 226 | ||
245 | - | ||
246 | - /*for(uint ii = 0; ii < list_pfams.size(); ii++) { | ||
247 | - for (uint jj = 0; jj < list_pfams[ii].size(); jj++) { | ||
248 | - cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl; | ||
249 | - } | ||
250 | - }*/ | ||
251 | 227 | ||
252 | new_id["occurences"] = list_pfams.size(); | 228 | new_id["occurences"] = list_pfams.size(); |
253 | - new_id["pfam"] = list_pfams; | 229 | + new_id["pfam"] = list_pfams; |
254 | - | ||
255 | - //cout << "-------ending---------" << endl; | ||
256 | new_motif[id] = new_id; | 230 | new_motif[id] = new_id; |
257 | new_id.clear(); | 231 | new_id.clear(); |
258 | - //cout << "valeur: " << ite << endl; | 232 | + |
259 | - /*for (uint i = 0; i < tab_struc.size() ; i++) { | ||
260 | - cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl; | ||
261 | - } */ | ||
262 | } | 233 | } |
263 | outfile << new_motif.dump(4) << endl; | 234 | outfile << new_motif.dump(4) << endl; |
264 | outfile.close(); | 235 | outfile.close(); |
... | @@ -267,13 +238,11 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { | ... | @@ -267,13 +238,11 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) { |
267 | 238 | ||
268 | int main() | 239 | int main() |
269 | { | 240 | { |
270 | - //183 | 241 | + |
271 | - //cout << "------------------BEGIN-----------------" << endl; | 242 | + string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json"; |
272 | - string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json"; | 243 | + string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json"; |
273 | - string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | ||
274 | counting_occurences(jsonfile, out); | 244 | counting_occurences(jsonfile, out); |
275 | 245 | ||
276 | - //cout << "------------------END-----------------" << endl; | ||
277 | return 0; | 246 | return 0; |
278 | } | 247 | } |
279 | 248 | ... | ... |
scripts/create_files.cpp
0 → 100644
1 | +#include <iostream> | ||
2 | +#include <sstream> | ||
3 | +#include <fstream> | ||
4 | +#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp" | ||
5 | +#include <typeinfo> | ||
6 | +#include <set> | ||
7 | +#include <algorithm> | ||
8 | +#include <cstdio> | ||
9 | +#include <vector> | ||
10 | + | ||
11 | +using namespace std; | ||
12 | +using json = nlohmann::json; | ||
13 | + | ||
14 | +/* | ||
15 | +Create a .fasta file for each of the sequence inside the benchmark in json format. | ||
16 | +Also create a .dbn and .txt file that list the name, sequence, 2d structure and contacts for all sequence in the benchmark file. | ||
17 | +Those files are useful for the Isaure_benchmark.py script. | ||
18 | +*/ | ||
19 | +void create_files(const string& jsonmotifs) { | ||
20 | + std::ifstream lib(jsonmotifs); | ||
21 | + string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/"; | ||
22 | + string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"; | ||
23 | + string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | ||
24 | + std::ofstream outlist (list); | ||
25 | + std::ofstream outdbn (dbn); | ||
26 | + json js = json::parse(lib); | ||
27 | + uint count = 0; | ||
28 | + | ||
29 | + for (auto it = js.begin(); it != js.end(); ++it) { | ||
30 | + string id = it.key(); | ||
31 | + string name, seq, contacts, structure; | ||
32 | + for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | ||
33 | + string chain = it2.key(); | ||
34 | + if (chain.compare("pfams") != 0) { | ||
35 | + string name = id + "_" + chain; | ||
36 | + string filename = fasta + name + ".fa"; | ||
37 | + std::ofstream outfasta (filename); | ||
38 | + outfasta << ">test_" << name << endl; | ||
39 | + for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) { | ||
40 | + string field = it3.key(); | ||
41 | + if (!field.compare("sequence")) { | ||
42 | + seq = it3.value(); | ||
43 | + outfasta << seq.substr(0,seq.size()) << endl; | ||
44 | + outfasta.close(); | ||
45 | + | ||
46 | + } else if (!field.compare("contacts")) { | ||
47 | + contacts = it3.value(); | ||
48 | + | ||
49 | + } else if (!field.compare("struct2d")) { | ||
50 | + structure = it3.value(); | ||
51 | + } | ||
52 | + } | ||
53 | + if(seq.find('&') == string::npos) { | ||
54 | + outlist << ">test_" << name << endl; | ||
55 | + outdbn << "test_" << name << "." << endl; | ||
56 | + outlist << contacts << endl; | ||
57 | + outdbn << seq << endl; | ||
58 | + outdbn << structure << endl; | ||
59 | + outdbn << contacts << endl; | ||
60 | + outlist << seq << endl; | ||
61 | + outlist << structure << endl; | ||
62 | + count++; | ||
63 | + } | ||
64 | + } | ||
65 | + } | ||
66 | + } | ||
67 | + cout << count << " sequences en tout" << endl; | ||
68 | + lib.close(); | ||
69 | + outlist.close(); | ||
70 | + outdbn.close(); | ||
71 | +} | ||
72 | + | ||
73 | +int main() | ||
74 | +{ | ||
75 | + string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/"; | ||
76 | + string jsonbm = path + "modules/ISAURE/benchmark_16-07-2021.json"; | ||
77 | + create_files(jsonbm); | ||
78 | + | ||
79 | + return 0; | ||
80 | +} | ||
81 | + |
... | @@ -12,6 +12,10 @@ | ... | @@ -12,6 +12,10 @@ |
12 | using namespace std; | 12 | using namespace std; |
13 | using json = nlohmann::json; | 13 | using json = nlohmann::json; |
14 | 14 | ||
15 | +/* | ||
16 | +This script is use to create a new motif library without a motif that contains the same pdb as the sequence used in input for prediction | ||
17 | +with BiORSEO. | ||
18 | +*/ | ||
15 | void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) { | 19 | void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) { |
16 | std::ifstream lib(jsonlibrary); | 20 | std::ifstream lib(jsonlibrary); |
17 | 21 | ||
... | @@ -51,8 +55,8 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& name, const s | ... | @@ -51,8 +55,8 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& name, const s |
51 | 55 | ||
52 | int main(int argc, char** argv) | 56 | int main(int argc, char** argv) |
53 | { | 57 | { |
54 | - string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json"; | 58 | + string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/motifs_final.json"; |
55 | - string out = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | 59 | + string out = "/local/local/BiorseoNath/data/modules/ISAURE/bibliotheque_a_lire/motifs_final.json"; |
56 | string name = argv[1]; | 60 | string name = argv[1]; |
57 | delete_redundant_pdb(jsonlibrary, name, out); | 61 | delete_redundant_pdb(jsonlibrary, name, out); |
58 | return 0; | 62 | return 0; | ... | ... |
... | @@ -12,18 +12,23 @@ using namespace std; | ... | @@ -12,18 +12,23 @@ using namespace std; |
12 | using json = nlohmann::json; | 12 | using json = nlohmann::json; |
13 | 13 | ||
14 | /* | 14 | /* |
15 | -That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. | 15 | +That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from (with the same pdb). |
16 | */ | 16 | */ |
17 | 17 | ||
18 | +//To store the pdb and the sequence in the benchmark file. Also stor the corresponding motif id and components based on this sequence. | ||
18 | struct data { | 19 | struct data { |
20 | + //the pdb code (in the name of the sequence) | ||
19 | string pdb; | 21 | string pdb; |
22 | + //the complete sequence with this pdb code | ||
20 | string seq_pdb; | 23 | string seq_pdb; |
24 | + //the id of the motif corresponding to this pdb in the library | ||
21 | string id; | 25 | string id; |
26 | + //the module sequence with the components of this motif with the above id | ||
22 | string cmp; | 27 | string cmp; |
23 | }; | 28 | }; |
24 | typedef struct data data; | 29 | typedef struct data data; |
25 | 30 | ||
26 | - | 31 | +//returns the list of pdb codes and the corresponding information from the benchmark file. |
27 | vector<data> get_list_pdb_benchmark(const string& benchmark) { | 32 | vector<data> get_list_pdb_benchmark(const string& benchmark) { |
28 | 33 | ||
29 | fstream bm(benchmark); | 34 | fstream bm(benchmark); |
... | @@ -57,6 +62,7 @@ string trim(string str) { | ... | @@ -57,6 +62,7 @@ string trim(string str) { |
57 | return str; | 62 | return str; |
58 | } | 63 | } |
59 | 64 | ||
65 | +//store the corresponding id and motif to the sequence from the benchmark file | ||
60 | data find_id_pattern(string& pdb_pattern, const string& benchmark) { | 66 | data find_id_pattern(string& pdb_pattern, const string& benchmark) { |
61 | vector<data> l = get_list_pdb_benchmark(benchmark); | 67 | vector<data> l = get_list_pdb_benchmark(benchmark); |
62 | int size = l.size(); | 68 | int size = l.size(); |
... | @@ -71,6 +77,8 @@ data find_id_pattern(string& pdb_pattern, const string& benchmark) { | ... | @@ -71,6 +77,8 @@ data find_id_pattern(string& pdb_pattern, const string& benchmark) { |
71 | return data(); | 77 | return data(); |
72 | } | 78 | } |
73 | 79 | ||
80 | +//Create an array of data ('association'), which consists of each pdb of the benchmark file | ||
81 | +// with the associated pattern from this sequence. | ||
74 | vector<data> find_id(const string& bibli, const string& benchmark) { | 82 | vector<data> find_id(const string& bibli, const string& benchmark) { |
75 | ifstream lib(bibli); | 83 | ifstream lib(bibli); |
76 | json js = json::parse(lib); | 84 | json js = json::parse(lib); |
... | @@ -112,6 +120,7 @@ vector<data> find_id(const string& bibli, const string& benchmark) { | ... | @@ -112,6 +120,7 @@ vector<data> find_id(const string& bibli, const string& benchmark) { |
112 | return association; | 120 | return association; |
113 | } | 121 | } |
114 | 122 | ||
123 | +//check if the motif is found matching with a complete sequence from a benchmark file. | ||
115 | bool does_it_match(const string& seq, const string& seq_motif) { | 124 | bool does_it_match(const string& seq, const string& seq_motif) { |
116 | size_t found = seq_motif.find("&"); | 125 | size_t found = seq_motif.find("&"); |
117 | size_t size = seq_motif.size(); | 126 | size_t size = seq_motif.size(); |
... | @@ -150,6 +159,7 @@ bool does_it_match(const string& seq, const string& seq_motif) { | ... | @@ -150,6 +159,7 @@ bool does_it_match(const string& seq, const string& seq_motif) { |
150 | return false; | 159 | return false; |
151 | } | 160 | } |
152 | 161 | ||
162 | +//return the list of motif id that didn't match with any other complete sequence than the one which it came from. | ||
153 | vector<string> select_not_motif(const string& bibli, const string& benchmark) { | 163 | vector<string> select_not_motif(const string& bibli, const string& benchmark) { |
154 | vector<string> selection; | 164 | vector<string> selection; |
155 | vector<data> association = find_id(bibli, benchmark); | 165 | vector<data> association = find_id(bibli, benchmark); |
... | @@ -187,8 +197,8 @@ vector<string> select_not_motif(const string& bibli, const string& benchmark) { | ... | @@ -187,8 +197,8 @@ vector<string> select_not_motif(const string& bibli, const string& benchmark) { |
187 | 197 | ||
188 | int main() | 198 | int main() |
189 | { | 199 | { |
190 | - string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | 200 | + string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json"; |
191 | - string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | 201 | + string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/benchmark.dbn"; |
192 | 202 | ||
193 | /*vector<data> v = get_list_pdb_benchmark(benchmark); | 203 | /*vector<data> v = get_list_pdb_benchmark(benchmark); |
194 | for (data d : v) { | 204 | for (data d : v) { | ... | ... |
-
Please register or login to post a comment