Nathalie BERNARD

Nettoyage des fichiers avec des commentaires en plus

Showing 62 changed files with 161 additions and 1757 deletions
This diff is collapsed. Click to expand it.
......@@ -65,9 +65,9 @@ Check the file [INSTALL.md](INSTALL.md) for installation instructions.
```
Usage: You must provide:
1) a FASTA input file with -i,
2) a module type with --rna3dmotifs, --carnaval or --3dmotifatlas
2) a module type with --rna3dmotifs, --carnaval, --3dmotifatlas or --contacts
3) one module placement method in { --patternmatch, --jar3d, --bayespairing }
4) one scoring function with --func A, B, C or D
4) one scoring function with --func A, B, C, D, E ou F
If you are not using the Docker image:
5) --modules-path, --biorseo-dir and (--jar3d-exec or --bypdir)
......@@ -79,6 +79,7 @@ Options:
--rna3dmotifs Use DESC modules from Djelloul & Denise, 2008
--carnaval Use RIN modules from Reinharz & al, 2018
--3dmotifatlas Use the HL and IL loops from BGSU's 3D Motif Atlas (updated)
--contacts Use the library of motifs, created from RNA sequences linked to proteins provided by I. Chauvot de Beauchene of LORIA laboratory
-p [ --patternmatch ] Use regular expressions to place modules in the sequence (requires --rna3dmotifs or --carnaval)
-j [ --jar3d ] Use JAR3D to place modules in the sequence (requires --3dmotifatlas)
-b [ --bayespairing ] Use BayesPairing2 to place modules in the sequence (requires --rna3dmotifs or --3dmotifatlas)
......@@ -123,5 +124,6 @@ The allowed module/placement-method/function combinations are:
--rna3dmotifs A. B. A. B. C. D.
--3dmotifatlas A. B. C. D. A. B. C. D.
--carnaval A. B.
--contacts E. F.
```
......
......@@ -381,7 +381,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
break;
case 'E':
// Fonction f1E
// Fonction f1E
for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
obj1 += IloNum(sum_k * insertion_sites_[i].contact_ * insertion_sites_[i].tx_occurrences_) * insertion_dv_[index_of_first_components[i]] ;
break;
......@@ -395,6 +395,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
}
}
//Stacking energy parameter matrix
double energy[7][7] = {
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
{0.0, 1.1, 2.1, 2.2, 1.4, 0.9, 0.6},
......@@ -408,7 +409,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
obj2 = IloExpr(env_);
switch (obj_function2_nbr_) {
case 'a':
// Define the MFE:
// Define the MFE (Minimum Free Energy):
for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) {
for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) {
if (get_xij_index(u, v) != rna_.get_RNA_length() * rna_.get_RNA_length() + 1) {
......@@ -429,7 +430,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
}
break;
}
//std::cout << "\n fin \n";
}
MOIP::~MOIP() { env_.end(); }
......@@ -705,7 +705,6 @@ void MOIP::define_problem_constraints(string& source)
SecondaryStructure MOIP::solve_objective(int o, double min, double max)
{
//cout << endl << "BEGIN" << endl;
// Solves one of the objectives, under constraint that the other should be in [min, max]
if (min > max) {
......@@ -755,17 +754,11 @@ SecondaryStructure MOIP::solve_objective(int o, double min, double max)
}
// if (verbose_) cout << "\t\t>retrieving basepairs of the result secondary structure..." << endl;
//cout << "y(2,80): " << cplex_.getValue(y(u, v)) << endl;
for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++)
for (size_t v = u + 4; v < rna_.get_RNA_length(); v++)
if (allowed_basepair(u, v))
if (cplex_.getValue(y(u, v)) > 0.5) {
best_ss.set_basepair(u, v);
/*if (u == 5 && v == 26) {
cout << endl << "(" << u << "," << v << "): " << endl;
cout << best_ss.to_string() << endl;
cout << "(((...((((((((....))))))))(((.....((((((((....)))))))))))...((((((((....)))))))))))" << endl;
}*/
}
best_ss.sort(); // order the basepairs in the vector
......@@ -1159,7 +1152,6 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct)
}
}
//Temporaire--------------------------------------
//Check if the sequence is a rna sequence (ATGC) and replace T by U or remove modified nucleotide if necessary
string check_motif_sequence(string seq) {
......@@ -1184,9 +1176,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
stack<uint> accolades;
stack<uint> chevrons;
/*for(uint j = 0; j < v.size(); j++) {
cout << "composante: (" << v[j].pos.first << "," << v[j].pos.second << ")" << endl << endl;
}*/
uint count = 0;
uint debut = v[count].pos.first;
uint gap = 0;
......@@ -1194,12 +1183,10 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
for (uint i = 0; i < struc.size(); i++) {
if (struc[i] == '(') {
parentheses.push(i + debut + gap - count);
//cout << "i: " << i << " pos :" << parentheses.top() << endl;
} else if (struc[i] == ')') {
Link l;
l.nts.first = parentheses.top();
//cout << "top :" << parentheses.top() << endl;
l.nts.second = i + debut + gap - count;
vec.push_back(l);
parentheses.pop();
......@@ -1237,8 +1224,6 @@ vector<Link> search_pairing(string& struc, vector<Component>& v) {
} else if (struc[i] == '&') {
count ++;
gap += v[count].pos.first - v[count - 1].pos.second - 1;
//cout << "count: " << count << endl;
//cout << "gap : " << gap << endl;
}
}
return vec;
......@@ -1311,11 +1296,9 @@ vector<string> find_components(string sequence, string delimiter) {
subseq = seq.substr(0, fin);
seq = seq.substr(fin + 1);
list.push_back(subseq); // new component sequence
//std::cout << "subseq: " << subseq << endl;
}
if (!seq.empty()) {
list.push_back(seq);
//std::cout << "subseq: " << seq << endl;
}
return list;
}
......@@ -1324,15 +1307,11 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) {
vector<uint> positions;
string delimiter = "*";
uint debut;
/*cout << "vsize: " << v.size() << endl;
cout << "struc2dsize: " << struc2d.size() << endl;*/
for (uint i = 0; i < v.size(); i++) {
//cout << "[" << i << "]:" << endl;
debut = v[i].pos.first;
uint pos = struc2d[i].find(delimiter, 0);
while(pos != string::npos && pos <= struc2d[i].size())
{
//cout << "position: " << pos + debut << endl;
positions.push_back(pos + debut);
pos = struc2d[i].find(delimiter, pos+1);
}
......@@ -1340,8 +1319,6 @@ vector<uint> find_contacts(vector<string>& struc2d, vector<Component>& v) {
return positions;
}
//Temporaire--------------------------------------
void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id)
{
/*
......@@ -1373,8 +1350,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
string keys[5] = {"contacts", "occurences", "pdb", "sequence", "struct2d"};
uint it_errors = 0;
uint comp;
//uint max_occ = 0;
//uint max_n = 0;
uint occ = 0;
for(auto it = js.begin(); it != js.end(); ++it) {
......@@ -1385,10 +1360,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
// Check for known errors to ignore corresponding motifs
if (comp == errors_id[it_errors].first) {
while (comp == errors_id[it_errors].first) {
//cout << "id erreur: " << errors_id[it_errors].first << " " << errors_id[it_errors].second << endl;
/*if (contacts_id.compare("974") == 0) {
cout << "id erreur: " << errors_id[it_errors].second << endl;
}*/
it_errors ++;
}
continue;
......@@ -1396,7 +1367,6 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
for(auto it2 = js[contacts_id].begin(); it2 != js[contacts_id].end(); ++it2) {
field = it2.key();
//cout << "field: " << field << endl;
if (!field.compare(keys[0])) // This is the contacts field
{
contacts = it2.value();
......@@ -1406,25 +1376,17 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
else if (!field.compare(keys[1])) // This is the occurences field
{
occ = it2.value();
//max_occ = find_max_occurrences(filepath);
tx_occurrences = (double)occ; // / (double)max_occ;
//cout << "occ: " << tx_occurrences << endl;
}
else if (!field.compare(keys[2])) // This is the pdb field
{
vector<string> tab = it2.value();
pdbs = tab;
/*for (uint i = 0; i < pdbs.size(); i++) {
cout << "pdbs[" << i << "]: " << pdbs[i] << endl;
}*/
}
else if (!field.compare(keys[3])) // This is the sequence field
{
seq = check_motif_sequence(it2.value());
/*max_n = find_max_sequence(filepath);
tx_occurrences = (double)occ / (double)max_n - seq.size() + 1 ;*/
component_sequences = find_components(seq, "&");
}
else if (!field.compare(keys[4])) // This is the struct2D field
......@@ -1440,9 +1402,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
Motif temp_motif = Motif(v, contacts_id, nb_contacts, tx_occurrences);
temp_motif.links_ = search_pairing(struct2d, v);
//cout << "test" << endl;
temp_motif.pos_contacts = find_contacts(component_contacts, v);
//cout << "test2" << endl;
// Check if the motif can be inserted, checking the basepairs probabilities and theta
bool unprobable = false;
......
......@@ -275,8 +275,7 @@ char Motif::is_valid_RIN(const string& rinfile)
return (char) 0;
}
//temporaire---------------------------------------------------
//check that there are as many opening parentheses as closing ones
bool checkSecondaryStructure(string struc)
{
stack<uint> parentheses;
......@@ -332,6 +331,7 @@ bool checkSecondaryStructure(string struc)
return (parentheses.empty() && crochets.empty() && accolades.empty() && chevrons.empty());
}
//count the number of nucleotide in the motif sequence
size_t count_nucleotide(string& seq) {
size_t count = 0;
for(uint i = 0; i < seq.size(); i++) {
......@@ -343,6 +343,7 @@ size_t count_nucleotide(string& seq) {
return count;
}
//count the numbre of '&' in the motif sequence
size_t count_delimiter(string& seq) {
size_t count = 0;
for(uint i = 0; i < seq.size(); i++) {
......@@ -354,7 +355,6 @@ size_t count_delimiter(string& seq) {
return count;
}
//--------------------------------------------------------------
vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
{
// /!\ returns 0 if no errors
......@@ -458,7 +458,6 @@ vector<pair<uint,char>> Motif::is_valid_JSON(const string& jsonfile)
}
j++;
}
//std::cout << "no error!\n" << endl;
}
return errors_id;
}
......@@ -524,17 +523,9 @@ vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<stri
if (regex_search(rna, c)) {
if (vc.size() > 2) {
next_seqs = vector<string>(&vc[1], &vc[vc.size()]);
/*for (uint i = 0; i < next_seqs.size(); i++) {
std::cout << "next seq: " << next_seqs[i] << endl;
}
std::cout << endl;*/
}
else {
next_seqs = vector<string>(1, vc.back());
/*for (uint i = 0; i < next_seqs.size(); i++) {
std::cout << "next seq: " << next_seqs[i] << endl;
}
std::cout << endl;*/
}
uint j = 0;
// For every regexp match
......@@ -606,17 +597,9 @@ vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector
if (regex_search(rna, c)) {
if (vc.size() > 2) {
next_seqs = vector<string>(&vc[1], &vc[vc.size()]);
/*for (uint i = 0; i < next_seqs.size(); i++) {
std::cout << "next seq: " << next_seqs[i] << endl;
}
std::cout << endl;*/
}
else {
next_seqs = vector<string>(1, vc.back());
/*for (uint i = 0; i < next_seqs.size(); i++) {
std::cout << "next seq: " << next_seqs[i] << endl;
}
std::cout << endl;*/
}
uint j = 0;
// For every regexp match
......
#include <iostream>
#include <sstream>
#include <fstream>
#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
#include <typeinfo>
#include <set>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
using json = nlohmann::json;
//Concatenate the motives from jsonmotifs by adding the corresponding pdb from jsondssr
void add_pdb(const string& jsonmotifs, const string& jsondssr, const string& jsonoutfile) {
std::ifstream lib(jsonmotifs);
std::ifstream lib2(jsondssr);
std::ofstream outfile (jsonoutfile);
json new_motif;
json new_id;
json js = json::parse(lib);
json js2 = json::parse(lib2);
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
string sequence, structure;
vector<string> list_pdbs;
vector<string> list_pdbs2;
bool is_added = true;
//cout << "id: " << id << endl;
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string test = it2.key();
if (!test.compare("sequence")) {
sequence = it2.value();
new_id[test] = it2.value();
} else if (!test.compare("struct2d")) {
structure = it2.value();
new_id[test] = it2.value();
} else {
new_id[test] = it2.value();
}
}
//cout << "-------begin---------" << endl;
for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
string id2 = it3.key();
string sequence2, structure2;
//cout << "id: " << id << " / id2: " << id2 << endl;
for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) {
string chain = it4.key();
for (auto it5 = js2[id2][chain].begin(); it5 != js2[id2][chain].end(); ++it5) {
string test = it5.key();
if (!test.compare("sequence")) {
sequence2 = it5.value();
//cout << sequence2 << endl;
if (!sequence.compare(sequence2) && !structure.compare(structure2)) {
//cout << id2 << endl;
vector<string> tmp;
tmp.push_back(id2);
new_id["pdb"] = tmp;
}
} else if (!test.compare("2D ")) {
structure2 = it5.value();
//cout << structure2 << endl;
}
}
}
//cout << endl;*/
}
/*for(uint ii = 0; ii < list_pfams.size(); ii++) {
for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
}
}*/
new_motif[id] = new_id;
new_id.clear();
//cout << "valeur: " << ite << endl;
/*for (uint i = 0; i < tab_struc.size() ; i++) {
cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl;
} */
}
outfile << new_motif.dump(4) << endl;
outfile.close();
}
int main()
{
string jsonmotifs = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_beta.json";
string jsondssr = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/dssr2.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_fusion_beta.json";
add_pdb(jsonmotifs, jsondssr, out);
return 0;
}
No preview for this file type
No preview for this file type
No preview for this file type
This diff is collapsed. Click to expand it.
No preview for this file type
#include <iostream>
#include <sstream>
#include <fstream>
#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
#include <typeinfo>
#include <set>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
using json = nlohmann::json;
/*
vector<string> find_components(string sequence, string delimiter) {
vector<string> list;
string seq = sequence;
string subseq;
uint fin = 0;
while(seq.find(delimiter) != string::npos) {
fin = seq.find(delimiter);
subseq = seq.substr(0, fin);
seq = seq.substr(fin + 1);
list.push_back(subseq); // new component sequence
//std::cout << "subseq: " << subseq << endl;
}
if (!seq.empty()) {
list.push_back(seq);
//std::cout << "subseq: " << seq << endl;
}
return list;
}
string is_include(vector<string>& components, string sequence, vector<string>& contacts) {
string seq_contact = "";
vector<uint> positions;
uint count = 0;
uint debut = 0;
string str = components[0];
uint pos = sequence.find(str, 0);
debut = pos + components[0].size();
if (pos == 0) {
seq_contact += contacts[0];
} else if (pos <= sequence.size()) {
string gap = "";
for (uint i = 0; i < pos; i++) {
gap += ".";
}
seq_contact += gap + contacts[0];
}
while(pos <= sequence.size() && count < components.size() - 1)
{
string gap = "";
debut = pos + components[count].size();
count++;
str = components[count];
pos = sequence.find(str, pos + components[count-1].size());
for (uint i = debut; i < pos; i++) {
gap += ".";
}
seq_contact += gap + contacts[count];
}
if (count == components.size() - 1) {
string gap = "";
if (seq_contact.size() != sequence.size()) {
for (uint i = 0; i < sequence.size() - seq_contact.size(); i++) {
gap += ".";
}
}
seq_contact += gap;
return seq_contact;
}
return std::string();
}*/
/*
//Concatenate the contact field to the motives of the benchmark (which is obtained from the motives library)
string add_contact(const string& jsonbm, const string& jsonmotifs) {
std::ifstream lib(jsonbm);
std::ifstream lib2(jsonmotifs);
string bm2 = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.json";
std::ofstream outfile (bm2);
json new_motif;
json new_id;
json js = json::parse(lib);
json js2 = json::parse(lib2);
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
string seq_bm;
string seq_contact;
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string test = it2.key();
//cout << "test: " << it2.key() << endl;
if (!test.compare("seq")) {
seq_bm = it2.value();
new_id[test] = it2.value();
} else {
new_id[test] = it2.value();
}
}
//cout << "-------begin---------" << endl;
for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
string id2 = it3.key();
vector<string> comp;
vector<string> strucs;
vector<string> list_pdbs;
bool flag = false;
//cout << "id: " << id << " / id2: " << id2 << endl;
for (auto it4 = js2[id2].begin(); it4 != js2[id2].end(); ++it4) {
string test = it4.key();
if (!test.compare("sequence")) {
string sequence = it4.value();
comp = find_components(sequence, "&");
//cout << id << " / " << id2 << endl;
} else if (!test.compare("contacts")) {
string struc2d = it4.value();
strucs = find_components(struc2d, "&");
} else if (!test.compare("pdb")) {
vector<string> tab = it4.value();
list_pdbs = tab;
if (find(list_pdbs.begin(), list_pdbs.end(), id) != list_pdbs.end()) {
flag = true;
}
}
}
if (flag) {
seq_contact = is_include(comp, seq_bm, strucs);
//cout << "id: " << id << " id2: " << id2 << " seq_contact: " << seq_contact << endl;
new_id["ctc"] = seq_contact;
}
}
new_motif[id] = new_id;
new_id.clear();
}
outfile << new_motif.dump(4) << endl;
outfile.close();
return bm2;
}*/
void create_benchmark(const string& jsonmotifs) {
std::ifstream lib(jsonmotifs);
string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/";
string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt";
string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
std::ofstream outlist (list);
std::ofstream outdbn (dbn);
json js = json::parse(lib);
uint count = 0;
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
string name, seq, contacts, structure;
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string chain = it2.key();
if (chain.compare("pfams") != 0) {
string name = id + "_" + chain;
string filename = fasta + name + ".fa";
std::ofstream outfasta (filename);
outfasta << ">test_" << name << endl;
for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) {
string field = it3.key();
if (!field.compare("sequence")) {
seq = it3.value();
outfasta << seq.substr(0,seq.size()) << endl;
outfasta.close();
} else if (!field.compare("contacts")) {
contacts = it3.value();
} else if (!field.compare("struct2d")) {
structure = it3.value();
}
}
if(seq.find('&') == string::npos) {
outlist << ">test_" << name << endl;
outdbn << "test_" << name << "." << endl;
outlist << contacts << endl;
outdbn << seq << endl;
outdbn << structure << endl;
outdbn << contacts << endl;
outlist << seq << endl;
outlist << structure << endl;
count++;
}
}
}
}
cout << count << " sequences en tout" << endl;
lib.close();
outlist.close();
outdbn.close();
}
int main()
{
string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/";
//string jsonmotifs = path + "modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json";
string jsonbm = path + "modules/ISAURE/Motifs_version_initiale/benchmark_16-07-2021.json";
//string jsonbm2 = add_contact(jsonbm1, jsonmotifs);
create_benchmark(jsonbm);
return 0;
}
No preview for this file type
No preview for this file type
>test_1JJ2
UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC
>test_1L9A
GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAUUUGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC
>test_1LNG
UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC
>test_1MFQ
GACACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGUC
>test_1SM1
CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
>test_1U6P
GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
>test_1Y69
CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
>test_1YHQ
UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
>test_1YI2
UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
>test_2V3C
GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
>test_2ZJQ
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_2ZJR
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_3ADB
GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA
>test_3CUL
GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
>test_3CUN
GAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
>test_3DLL
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_3HHN
UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
>test_3IVKA
UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
>test_3IWN
CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG
>test_3KTW
AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU
>test_3MUM
GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
>test_3MUR
GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
>test_3NDB
GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC
>test_3PIO
ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
>test_3PIP
ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
>test_3UCU
GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
>test_3UD4
GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
>test_3V7E
GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA
>test_3W3S
GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC
>test_4IO9
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_4IOA
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_4LCK
GGGUGCGAUGAGAAGAAGAGUAUUAAGGAUUUACUAUGAUUAGCGACUCUAGGAUAGUGAAAGCUAGAGGAUAGUAACCUUAAGAAGGCACUUCGAGCACCC
>test_4P3EA
GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU
>test_4P3EB
GACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUU
>test_4UYJ
GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC
>test_4UYK
GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC
>test_4W90
GCGCGCUUAAUCUGAAAUCAGAGCGGGGGACCCAUUGCACUCCGGGUUUUUCCCGUAAGGGGUGAAUCCUUUUUAGGUAGGGCGAAAGCCCGAAUCCGUCAGCUAACCUCGUAAGCGCGC
>test_4WF9
UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
>test_4XCO
GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
>test_4YB1
GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG
>test_5DM7
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
>test_5JVGA
ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
>test_5M73
GGUGUCCGCACUAAGUUCGGCAUCAAUAUGGUGACCUCCCGGGAGCGGGGGACCACCAGGUUGCCUAAGGAGGGGUGAACCGGCCCAGGUCGGAAACGGAGCAGGUCAAAACUCCCGUGCUGAUCAGUAGUGGGAUCGCGCCUA
>test_5NRGA
UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
This diff is collapsed. Click to expand it.
> JSON1000_extended
AAUAUCCGGGCGUUUAAUCCCGGGAUAAA
\ No newline at end of file
>test_3DLL
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
\ No newline at end of file
>test_1003_1005_110
CCGGGACCUCUAACCGGGUUCCCGGGCAGUCACUG
\ No newline at end of file
>test_927
CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG
>test_170
GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA
>test_768
CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG
>test_770
CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG
>test_266
CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
>test_267
CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
>test_766
AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG
>test_851
AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG
>test_948
CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG
>test_972
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
>test_159
UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
>test_122
GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
>test_264
CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
>test_265
CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
>test_109
GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA
>test_968
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
>test_962
AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
>test_62
GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
>test_1010
GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU
>test_1018
GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC
>test_1028
GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC
>test_1034
UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU
>test_1035
GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC
>test_147
AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
>test_72
GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
>test_968
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
>test_962
AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
>test_62
GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
>test_927
CCCGUGCCCAUAGAGGACCACCCCAUCCAUGCCGAGCUGGGUUUGUGAAACACGCCAAUGAUCCGGACCGCAGGGUCCCAAGUCGGUCAGCGCGGG
.((((((.....(&(.&.(((((((&.(.....).&.)))).&&)))...)&).((&.....&.(((((....))))).&....))...)))))).
>test_170
GCGGGAGUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCCUGUUCGAAUCAGCCCCCGCACCA
(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&)))))....
>test_768
CUAGUCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGUUCGAAGCCGAGGUGUUCUUCCCCCUCGCCGUG
(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&&)...(&(((((&(..&..)&&)))))&)..)
>test_770
CUAGACGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGCCGAAGUUUCGAGGGUUUUUCUCUCGCCGUG
(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&&&((((&(..&..)&))))&)..)
>test_266
CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
...................................................................................................
>test_267
CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
...................................................................................................
>test_766
AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG
.............................................((((((....))))))
>test_851
AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG
.............................................((((((....))))))
>test_948
CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG
...........................(((((.....)))))
>test_972
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
......(((((........))))..)................
\ No newline at end of file
>test_159
UUAAGGCGGCCACAGCAGUUGUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACGGCGUUCCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
..&&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&&((((((.((....))))))))&)...)))))).
>test_122
GGCGGCCACAGCGGUUUGAUCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCGGGCGUUCCCUGGAGUGCGCGAGCCUCUGGGGUUCGCCGCCA
((((((....(((((&&(&.&..((((((...(.....)...))))..))....)&)))&&))...(&((((((.((....))))))))&)...)))))).
>test_264
CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC
...................................................................................................
>test_265
CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC
...................................................................................................
>test_109
GGCGGCCACAGCGGUAAGCCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCAACCAGCGUUCUUCUGGAGUGCGCGAGCCUCUGGAGUUCGCCGCCA
((((((....(((((&&(&..((((((...(.....)...))))..))....)&)))&))...(&&((((((.((....))))))))&)...)))))).
>test_968
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
......(((((........))))..)................
>test_962
AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
.....(((((........))))..)................
>test_62
GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
(((((((((.((((((....))))..)))))))))))
>test_1010
GAUGAGACGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGU
..........((((((..((((....))))....))))))..(((..).)).......((((....))))..
>test_1018
GGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUCCCCAC
(((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))....
>test_1028
GCCCGGAUAGCUCAGUCGGUAGAGCAUCAGACUUUUAAUCUGAGGGUCCAGGGUUCAAGUCCCUGUUCGGGC
(((((((..((((........)))).(((((.(...).))))).....(((((.......))))))))))))
>test_1034
UAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGU
.......(((((.(((..(((.........)))..))).....(...((......)).).)))))
>test_1035
GCCUAAGACAGCGGGGAGGUUGGCUUAGAAGCAGCCAUCCUUUAAAGAGUGCGUAACAGCUCACCCGUCGAGGC
(((.......(((((.(((..(((.........)))..))).....(...((......)).).)))))...)))
>test_147
AUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
.....((((..(((.(((((((((....)))))....)))))))))))((((((((((....))))))))))
>test_72
GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA
(((((((................(((..((((.......))))...)))(((((.......))))))))))))....
>test_968
GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
......(((((........))))..)................
>test_962
AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG
.....(((((........))))..)................
>test_62
GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC
(((((((((.((((((....))))..)))))))))))
>test_1JJ2
...************.**.....*.*******.****..***.****************.......****.............*****..***...*****............*******..
UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCGGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAACCCGGUUCGCCGCCACC
...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...(((((.....((((((.((....))))))))....)))))...))))))...
>test_1LNG
................************.....................................*****....***....................
UCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUC
..(.((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)).)
>test_1U6P
.............................*****..............................................**..................*
GGCGGUACUAGUUGAGAAACUAGCUCUGUAUCUGGCGGACCCGUGGUGGAACUGUGAAGUUCGGAACACCCGGCCGCAACCCUGGGAGAGGUCCCAGGGUU
.((((..((((((....))))))..)))).....((((..(((.(((((((((....)))))....)))))))))))((((((((((....))))))))))
>test_1Y69
.........***................................................................**.........****...........................
CCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGG
(((((((((.....((.(((((....((((((...............)))..)))...)))))..))(((.......((.(((((....))))).)).......)))..)))))))))
>test_1YHQ
...***************......********.****..***.****************.......****............******..***...****.............*******..
UUAGGCGGCCACAGCGGUGGGGUUGCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGCCCACCAGCGUUCCAGGGAGUACUGGAGUGCGCGAGCCUCUGGGAAAUCCGGUUCGCCGCCACC
...((((((....((((((((......((((((...(.....)...))))..))....)))))).))...((.((.....((((((.((....))))))))....)).))...))))))...
>test_2V3C
..............************...........******.****.....**....*********...**********........***....
GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
((((..(((((.(((((((((....)))))))))..))))).....(((((.....(((.....(((....))).....)))..)))))..)))).
>test_2ZJQ
......****.**..............********..**.******.******.****..*............*******..***.....******.......*****.....******...
CACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGUU
.((((((((((.....((.(((((....(((((((...(.....)...))))..)))...)))))..))(((.......((.(((((....))))).)).......)))..)))))))))).
>test_3ADB
*............********.****...............****...*.....................**.**.....*..**....***
GGCCGCCGCCACCGGGGUGGUCCCCGGGCCGGACUUCAGAUCCGGCGCGCCCCGAGUGGGGCGCGGGGUUCAAUUCCCCGCGGCGGCCGCCA
(((((((((..((((((..[.))))))((((((.......))))))(((((((....)))))))((((..]....)))))))))))))....
>test_3CUL
.............................********.**................................................*...
GGAUGGCGAAAGCCAUUUCCGCAGGCCCCAUUGCACUCCGGGGUAUUGGCGUUAGGUGGUGGUACGAGGUUCGAAUCCUCGUACCGCAGCCA
(((((((....)))))).)(((..(((((..........)))))....)))...(((.(((((((((((.......))))))))))).))).
>test_3HHN
...............................................................**...********.**..........................................................
UCCAGUAGGAACACUAUACUACUGGAUAAUCAAAGACAAAUCUGCCCGAAGGGCUUGAGAACAUACCCAUUGCACUCCGGGUAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAACAAUAGUGA
((((((((...[[[[[[.))))))))...............[[[[[(...).(.((((((((((((((..........)))))))..((((.]]]]]))))((.((((......)))).)))))))))).]]]]]].
>test_3IWN
....................................................************.*******.....................
CACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACGGCAUUGCACUCCGCCGUAGGUAGCGGGGUUACCGAUGG
((((......((...(((.((....)).)))..[))...(((.((.(((((..((((..........))))))))).].)))))...)).)).
>test_3KTW
...............*************............................*.........*****...****.................
AGAUAGUCGUGGGUUCCCUUUCUGGAGGGAGAGGGAAUUCCACGUUGACCGGGGGAACCGGCCAGGCCCGGAAGGGAGCAACCGUGCCCGGCUAU
..(((..((.(((((((((((((....))))))))))).)).))....(.(((.....(((.....(((....))).....)))..))).).)))
>test_3MUM
....................................................***..*******..**.......................
GUCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUACCGAUGG
..(.((......((...((((((....))))))..[))...(((.((((((((..((..........))))))).]))))))...))...)
>test_3MUR
....................................................****.********.**.......................
GUCACGCACAGGGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACUCCGGUAGGUAGCGGGGUUAUCGAUGG
..(.((......((...((((((....))))))..[))...(((.((((((((...(..........).))))).]))))))...))...)
>test_3NDB
.................................*************..........................**....**********..*********.....................................
GUCUCGUCCCGUGGGGCUCGGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCGGCGCUCACGGGGGUGCGGGAC
((((((..(((((.(((.(((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).))))))).).))))).....))))))
>test_3PIO
.....****.***.............********..**.******.*****..****..*............*******..***....*****.*.......****......******..
ACCCCCGUGCCCAUAGCACUGUGGAACCACCCCACCCCAUGCCGAACUGGGUCGUGAAACACAGCAGCGCCAAUGAUACUCGGACCGCAGGGUCCCGGAAAAGUCGGUCAGCGCGGGGGU
((((((((((.....((.(((((....((((((...............)))..)))...)))))..)).((.......((.(((((....))))).)).......))...))))))))))
>test_3V7E
........*........**............****...................................................*......................................*
GGCUUAUCAAGAGAGGUGGAGGGACUGGCCCGAUGAAACCCGGCAACCACUAGUCUAGCGUCAGCUUCGGCUGACGCUAGGCUAGUGGUGCCAAUUCCUGCAGCGGAAACGUUGAAAGAUGAGCCA
((((((((....(.(((...(((.[.[[)))......))))(((..(((((((((((((((((.(....).))))))))))))))))).)))...(]].](((((....)))))..))))))))).
>test_3W3S
...................**............................*...........................*....................
GGGAGAGGUUGGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUCCCUCUCCCGCC
((((((((..((.((((....))))))((((((.......))))).)((((.((((....)))).)))).(((((.......)))))))))))))...
>test_4UYJ
......*.............************.....................**...........*****.................***...................
GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGUAAGGGUUCCCACCCUCGGGCGUGCCUC
(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((....)))..))..)))))))))..)))))
>test_4UYK
......*.............************.....................**...........****..........................................***...................
GGGGCUAGGCCGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGGGCCGAAGCCCGAGGGGCGGUUCCCGAAGCCGCCUCUGUAAGGAGGCGGUGGAGGGUUCCCACCCUCGGGCGUGCCUC
(((((..(((((((((...[[[[[.)))))....((((....]]]]].....))))))))...(((((((((..((..(((...(((((((((....)))))))))...)))..))..)))))))))..)))))
>test_4WF9
...****..**.............*****.....*...***...******.****.*.............*****.*..***....**..***.....***.......****..
UCUGGUGACUAUAGCAAGGAGGUCACACCUGUUCCCAUGCCGAACACAGAAGUUAAGGUCUUUAGCGACGAUGGUAGCCAACUUACGUUCCGCUAGAGUAGAACGUUGCCAGGC
.(..(..(.....((((.((......((((((...(.....)...))))..)).....)).)).))............(............)..............)..)..).
>test_4XCO
..............*************........**........................*******....********................
GGCGGUGGGGGAGCAUCUCCUGUAGGGGAGAUGUAACCCCCUUUACCUGCCGAACCCCGCCAGGCCCGGAAGGGAGCAACGGUAGGCAGGACGUCG
((((..(((((.(((((((((....)))))))))..)))))....((((((.....(((.....(((....))).....)))..)))))).)))).
>test_4YB1
****.............................................................................*.........
GGGCACGCACAGAGCAAACCAUUCGAAAGAGUGGGACGCAAAGCCUCCGGCCUAAACCAUUGCACCUCGGUAGGUAGCGGGGUUACCGAUG
...((((......((...((((((....))))))..[))...(((.(((((((...((..........)).)))).]))))))...)).))
{
"1": {
"occurences": 1,
"pdb": [
"1A1T"
],
"sequence": "GGACUAGCGGAGGCUAGUCC",
"struct2d": "((((((((....))))))))"
},
"10": {
"occurences": 1,
"pdb": [
"1AUD"
],
"sequence": "GGCAGAGUCCUUCGGGACAUUGCACCUG",
"struct2d": "(.(((.((((....)))).......)))"
},
"100": {
"occurences": 1,
"pdb": [
"1N38"
],
"sequence": "UUAGC",
"struct2d": "...))"
},
"1000": {
"occurences": 1,
"pdb": [
"4Z4C"
],
"sequence": "CAAUGUGAC",
"struct2d": "))))))))."
},
"1001": {
"occurences": 1,
"pdb": [
"4Z4D"
],
"sequence": "UUCACAUUGCCCAAGUCU&U",
"struct2d": ".((((((((.........&."
},
"1002": {
"occurences": 1,
"pdb": [
"4Z4I"
],
"sequence": "CAAUGUGA",
"struct2d": "))))))))"
},
"1003": {
"occurences": 1,
"pdb": [
"4Z4F"
],
"sequence": "UUCACAUUGCCCAAGU&U",
"struct2d": ".((((((((.......&."
},
"1004": {
"occurences": 1,
"pdb": [
"4Z7L"
],
"sequence": "GCAAAAUAACAAGC",
"struct2d": "((..........))"
},
"1005": {
"occurences": 1,
"pdb": [
"4ZDOB"
],
"sequence": "GCCCGGAUGAUCCUCAGUGGUCUGGGGUGCAG&ACCUGU&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCGG",
"struct2d": "(((((((.(..((((((..[.)))))).((((&..))))&((((.&.))))..((((..]....))))).)))))"
},
"1006": {
"occurences": 1,
"pdb": [
"4ZDPA"
],
"sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG",
"struct2d": "(((((((.(..&(((..[.)))...((((((&.))))))&((((.&.))))..((((..]....))))).))))"
},
"1007": {
"occurences": 1,
"pdb": [
"4ZDPB"
],
"sequence": "GCCCGGAUGAU&CAGUGGUCUGGGGUGCAGG&ACCUGUA&UGUCU&CGACAGAGUGGUUCAAUUCCACCUUUCG",
"struct2d": "(((((((.(..&(((..[.)))...(((((.&..)))))&((((.&.))))..((((..]....))))).))))"
},
"1008": {
"occurences": 1,
"pdb": [
"4ZLD"
],
"sequence": "UAACUUCUGUGAAGUU",
"struct2d": ".((((((...))))))"
}
}
\ No newline at end of file
{
"1": {
"occurences": 3,
"pdb": [
"1A1T"
],
"pfam": [
[
"UNK13"
],
[
"PF00539",
"PF08652"
],
[
"PF00098"
]
],
"sequence": "ACUAGCGGAGGCUAGU",
"struct2d": "((((((....))))))"
},
"10006": {
"occurences": 2,
"pdb": [
"1MNB",
"2A9X"
],
"pfam": [
[
"PF00539",
"PF08652"
],
[
"UNK13"
]
],
"sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
"struct2d": "(((.(((((((....))))))((((....))))))))"
}
}
{
"1":{
"occurences":2,
"pdb":[
"1A1T"
],
"pfam":[
[
"PF00539",
"PF08652"
],
[
"PF00098"
]
],
"sequence":"ACUAGCGGAGGCUAGU",
"struct2d":"((((((....))))))"
},
"10006":{
"occurences":2,
"pdb":[
"1MNB",
"2A9X"
],
"pfam":[
[
"PF00539",
"PF08652"
],
[
"UNK13"
]
],
"sequence":"UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
"struct2d":"(((.(((((((....))))))((((....))))))))"
}
}
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
{
"927": {
"contacts": "..****..**...&..&*****.**&**..*****&******&*******&....&*****&.***.....*******&.*****.....*****",
"occurences": 1,
"pdb": [
"5JVGA",
"5JVGB"
],
"pfam": [
"PF00181",
"PF00237",
"PF00238",
"PF00252",
"PF00276",
"PF00281",
"PF00297",
"PF00298",
"PF00327",
"PF00347",
"PF00453",
"PF00467",
"PF00468",
"PF00471",
"PF00572",
"PF00573",
"PF00673",
"PF00828",
"PF00829",
"PF00830",
"PF00831",
"PF00861",
"PF01016",
"PF01196",
"PF01245",
"PF01386",
"PF01632",
"PF01783",
"PF03947",
"PF14693",
"PF17136"
],
"sequence": "CCCGUGCCCAUAG&GG&CCACCCCA&CCAUGCCGA&CUGGGU&GUGAAAC&CGCC&AUGAU&CGGACCGCAGGGUCCC&AGUCGGUCAGCGCGGG",
"struct2d": ".((((((.....(&(.&.(((((((&.(.....).&.)))).&)))...)&).((&.....&.(((((....))))).&....))...))))))."
},
"170": {
"contacts": "*****&......***....**...****............*****.....******.....&.........&..&...*.****",
"occurences": 1,
"pdb": [
"1WZ2"
],
"pfam": [
"PF00133",
"PF08264"
],
"sequence": "GCGGG&GUUGCCGAGCCUGGUCAAAGGCGGGGGACUCAAGAUCCCCUCCCGUAGGGGUUCC&GUUCGAAUC&GC&CCCGCACCA",
"struct2d": "(((((&(..(((.......[.....))).(((((.......))))).(((....)))...(&(..]....)&))&)))))...."
},
"768": {
"contacts": "..*.&..........************.....................**..&.....&*****&...&***&.....&....",
"occurences": 1,
"pdb": [
"4UYJ"
],
"pfam": [
"PF02290",
"PF05486"
],
"sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAGG&GUU&UUC&CCUCG&CGUG",
"struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&(((((&(..&..)&)))))&)..)"
},
"770": {
"contacts": "..*.&..........************.....................**..&.....&****&...&***&....&....",
"occurences": 1,
"pdb": [
"4UYK"
],
"pfam": [
"PF02290",
"PF05486"
],
"sequence": "CUAG&CGGGGGGUUCGGCGUCCCCUGUAACCGGAAACCGCCGAUAUGCCGGG&CGAAG&CGAG&GUU&UUC&CUCG&CGUG",
"struct2d": "(..(&((((((...[[[[[.)))))....((((....]]]]].....)))))&)...(&((((&(..&..)&))))&)..)"
},
"266": {
"contacts": "***************************************************************************************************",
"occurences": 1,
"pdb": [
"2GTT"
],
"pfam": [
"PF00945"
],
"sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC",
"struct2d": "..................................................................................................."
},
"267": {
"contacts": "***************************************************************************************************",
"occurences": 1,
"pdb": [
"2GTT"
],
"pfam": [
"PF00945"
],
"sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC",
"struct2d": "..................................................................................................."
},
"766": {
"contacts": "***************************************************..********",
"occurences": 1,
"pdb": [
"4U7U"
],
"pfam": [
"PF08798",
"PF09344",
"PF09481",
"PF09485",
"PF09704"
],
"sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG",
"struct2d": ".............................................((((((....))))))"
},
"851": {
"contacts": "***************************************************...*******",
"occurences": 1,
"pdb": [
"5CD4"
],
"pfam": [
"PF08798",
"PF09344",
"PF09481",
"PF09485",
"PF09704"
],
"sequence": "AUAAACCGACGGUAUUGUUCAGAUCCUGGCUUGCCAACAGGAGUUCCCCGCGCCAGCGGGG",
"struct2d": ".............................................((((((....))))))"
},
"948": {
"contacts": "******************************************",
"occurences": 1,
"pdb": [
"5O7H"
],
"pfam": [
"PF09618"
],
"sequence": "CUUAGAAAGCAAACCUAUAACCGUUCACCGCCGCACAGGCGG",
"struct2d": "...........................(((((.....)))))"
},
"972": {
"contacts": "******************************************",
"occurences": 1,
"pdb": [
"5WLH"
],
"pfam": [
"UNK81"
],
"sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
"struct2d": "......(((((........))))..)................"
},
}
\ No newline at end of file
{
"159": {
"contacts": "..&************&**&.&******.*.****..***.****************&..&.****.&...*****...**...*****&....*******",
"occurences": 1,
"pdb": [
"1VQ6"
],
"pfam": [
"PF00181",
"PF00237",
"PF00238",
"PF00252",
"PF00276",
"PF00281",
"PF00297",
"PF00298",
"PF00327",
"PF00347",
"PF00466",
"PF00467",
"PF00572",
"PF00573",
"PF00673",
"PF00827",
"PF00828",
"PF00831",
"PF00832",
"PF00935",
"PF01157",
"PF01198",
"PF01246",
"PF01248",
"PF01280",
"PF01655",
"PF01780",
"PF01907",
"PF03947",
"PF16906",
"PF17144"
],
"sequence": "UU&GGCGGCCACAGC&GU&G&GCCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&AC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
"struct2d": "..&((((((....((&((&(&...((((((...(.....)...))))..))....)&))&))...(&((((((.((....))))))))&)...))))))."
},
"122": {
"contacts": "***************&.&*&*****.******..***.****************&...&.****.&...*****..***...*****&....*******",
"occurences": 1,
"pdb": [
"1Q81",
"1Q82",
"3CPW"
],
"pfam": [
"PF00181",
"PF00237",
"PF00238",
"PF00252",
"PF00276",
"PF00281",
"PF00297",
"PF00327",
"PF00347",
"PF00466",
"PF00467",
"PF00572",
"PF00573",
"PF00673",
"PF00827",
"PF00828",
"PF00831",
"PF00832",
"PF00935",
"PF01157",
"PF01198",
"PF01246",
"PF01248",
"PF01280",
"PF01655",
"PF01780",
"PF01907",
"PF03947",
"PF16906",
"PF17144"
],
"sequence": "GGCGGCCACAGCGGU&G&U&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
"struct2d": "((((((....(((((&(&.&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))."
},
"264": {
"contacts": "***************************************************************************************************",
"occurences": 1,
"pdb": [
"2GTT"
],
"pfam": [
"PF00945"
],
"sequence": "CCCCCCCACCCACAAAAACCACAACACCCACAAACCCAAAAAACCCCACAACCCCCCCACACCCCACCAACCCCACAAACCCCACACACCCCACAAAAC",
"struct2d": "..................................................................................................."
},
"265": {
"contacts": "***************************************************************************************************",
"occurences": 1,
"pdb": [
"2GTT"
],
"pfam": [
"PF00945"
],
"sequence": "CCACCAACCCCACACACCCCCCCACCCCCACCCACCACACAAAACCCCCAAAACCCCCCCAACCCCCAAACCCCACCAACCCCACCAACCCCACAAACC",
"struct2d": "..................................................................................................."
},
"109": {
"contacts": "***************&.&************..***.****************&...&.****.&...*****...**...*****&....*******",
"occurences": 2,
"pdb": [
"1N8R",
"1W2B"
],
"pfam": [
"PF00181",
"PF00237",
"PF00238",
"PF00252",
"PF00276",
"PF00281",
"PF00297",
"PF00327",
"PF00347",
"PF00466",
"PF00467",
"PF00572",
"PF00573",
"PF00673",
"PF00827",
"PF00828",
"PF00831",
"PF00832",
"PF00935",
"PF01157",
"PF01198",
"PF01246",
"PF01248",
"PF01280",
"PF01655",
"PF01780",
"PF01907",
"PF03947",
"PF05697",
"PF16906",
"PF17144"
],
"sequence": "GGCGGCCACAGCGGU&G&CCUCCCGUACCCAUCCCGAACACGGAAGAUAAGC&ACC&GCGUUC&CUGGAGUGCGCGAGCCUCUGG&GUUCGCCGCCA",
"struct2d": "((((((....(((((&(&..((((((...(.....)...))))..))....)&)))&))...(&((((((.((....))))))))&)...))))))."
},
"762": {
"contacts": "***************************************************..********",
"occurences": 1,
"pdb": [
"4U7U"
],
"pfam": [
"PF08798",
"PF09344",
"PF09481",
"PF09485",
"PF09704"
],
"sequence": "AUAAACCGGGCUCCCUGUCGGUUGUAAUUGAUAAUGUUGAGAGUUCCCCGCGCCAGCGGGG",
"struct2d": ".............................................((((((....))))))"
},
"968": {
"contacts": "******************************************",
"occurences": 1,
"pdb": [
"5WLH"
],
"pfam": [
"UNK81"
],
"sequence": "GAAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
"struct2d": "......(((((........))))..)................"
},
"962": {
"contacts": "*****************************************",
"occurences": 2,
"pdb": [
"5W1H"
],
"pfam": [
"UNK75"
],
"sequence": "AAGAUAGCCCAAGAAAGAGGGCAAUAACCAGAUAUAGCCUG",
"struct2d": ".....(((((........))))..)................"
},
"62": {
"contacts": ".*****..********.......*****.**....**",
"occurences": 1,
"pdb": [
"1I6U"
],
"pfam": [
"PF00410"
],
"sequence": "GGGCCCGGUAAGUCUCUUCGGAGAUACUGCCGGGCCC",
"struct2d": "(((((((((.((((((....))))..)))))))))))"
},
}
This diff could not be displayed because it is too large.
{
"1": {
"occurences": 2,
"pdb": [
"1A1T"
],
"pfam": [
"PF00098"
],
"sequence": "ACUAGCGGAGGCUAGU",
"struct2d": "((((((....))))))"
},
"100006": {
"occurences": 2,
"pdb": [
"1MNB",
"2A9X"
],
"pfam": [
"PF00539",
"UNK13"
],
"sequence": "UCGUGACUAGCGGAGGCUAGUAGCUCAUUAGCUCCGA",
"struct2d": "(((.(((((((....))))))((((....))))))))"
},
"104": {
"occurences": 3,
"pdb": [
"1MNB",
"2A9X"
],
"pfam": [
"PF00539",
"UNK13"
],
"sequence": "UCGUG&AGCUCAUUAGCUCCGA",
"struct2d": "(((.(&((((....))))))))"
}
}
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
{
"103": {
"occurences": 1,
"sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"1103": {
"occurences": 1,
"sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
"struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...."
},
"1104": {
"occurences": 1,
"sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"111": {
"occurences": 1,
"sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA",
"struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...."
},
"141": {
"occurences": 1,
"sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
"struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...."
},
"16": {
"occurences": 1,
"sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA",
"struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"281": {
"occurences": 1,
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....."
},
"282": {
"occurences": 1,
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....."
},
"37": {
"occurences": 1,
"sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"453": {
"occurences": 1,
"sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC",
"struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).."
},
"454": {
"occurences": 1,
"sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC",
"struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))."
},
"46": {
"occurences": 1,
"sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA",
"struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...."
},
"470": {
"occurences": 1,
"sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA",
"struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...."
},
"536": {
"occurences": 2,
"sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA",
"struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))"
},
"645": {
"occurences": 1,
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......."
},
"671": {
"occurences": 1,
"sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC",
"struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))"
},
"680": {
"occurences": 1,
"sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC",
"struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))"
},
"72": {
"occurences": 1,
"sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA",
"struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...."
},
"955": {
"occurences": 1,
"sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC",
"struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...."
},
"985": {
"occurences": 1,
"sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU",
"struct2d": ".............................................................................."
}
}
{
"103": {
"occurences": 1,
"pdb": [
"1TTT"
],
"sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"1103": {
"occurences": 1,
"pdb": [
"5HC9"
],
"sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
"struct2d": "(((((((..((((.....[..))))..(((.........)))......(((((..]....))))))))))))...."
},
"1104": {
"occurences": 1,
"pdb": [
"5HC9"
],
"sequence": "GGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"111": {
"occurences": 1,
"pdb": [
"1QF6"
],
"sequence": "GCCGAUAUAGCUCAGUUGGUAGAGCAGCGCAUUCGUAAUGCGAAGGUCGUAGGUUCGACUCCUAUUAUCGGCACCA",
"struct2d": "(((((((..((((.....[..))))..((((.......))))......(((((..]....))))))))))))...."
},
"141": {
"occurences": 1,
"pdb": [
"1TTT"
],
"sequence": "GCGGAUUUAGCUCAGUUGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCACCA",
"struct2d": "(((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))...."
},
"16": {
"occurences": 1,
"pdb": [
"1C0A"
],
"sequence": "GGAGCGGUAGUUCAGUCGGUUAGAAUACCUGCCUGUCACGCAGGGGGUCGCGGGUUCGAGUCCCGUCCGUUCCGCCA",
"struct2d": "(((((((..((((.....[...)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"281": {
"occurences": 1,
"pdb": [
"2FMT"
],
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": ".((((((..((((......[..)))).(((((.......))))).....(((((..]....)))))))))))....."
},
"282": {
"occurences": 1,
"pdb": [
"2FMT"
],
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": ".((((((..((((.........)))).(((((.......))))).....(((((.......)))))))))))....."
},
"37": {
"occurences": 1,
"pdb": [
"1EIY"
],
"sequence": "GCCGAGGUAGCUCAGUUGGUAGAGCAUGCGACUGAAAAUCGCAGUGUCCGCGGUUCGAUUCCGCGCCUCGGCACCA",
"struct2d": "(((((((..((((.....[..)))).(((((.......))))).....(((((..]....))))))))))))...."
},
"453": {
"occurences": 1,
"pdb": [
"2ZUFB"
],
"sequence": "GGACCGGUAGCCUAGCCAGGACAGGGCGGCGGCCUCCUAAGCCGCAGGUCCGGGGUUCAAAUCCCCGCCGGUCCGC",
"struct2d": "(((((((..((((......[...)))).(((((.......))))).....(((((..]....)))))))))))).."
},
"454": {
"occurences": 1,
"pdb": [
"2ZZM"
],
"sequence": "GGGUCGCCAAGCCUGGCCAAAGGCGCUGGGCCUAGGACCCAGUCCCGUAGGGGUUCCAGGGUUCAAAUCCCUGCCCC",
"struct2d": "(((..(((.......[.....)))((((((.......))))))(((....)))...(.(((..]....))).))))."
},
"46": {
"occurences": 1,
"pdb": [
"1F7U"
],
"sequence": "UUCCUCGUGGCCCAAUGGUCACGGCGUCUGGCUGCGAACCAGAAGAUUCCAGGUUCAAGUCCUGGCGGGGAAGCCA",
"struct2d": "(((((((..(((.....[....))).(((((.......))))).....(((((..]....))))))))))))...."
},
"470": {
"occurences": 1,
"pdb": [
"3AMU"
],
"sequence": "GGGCCCGUAGCUUAGCCAGGUCAGAGCGCCCGGCUCAUAACCGGGCGGUCGAGGGUUCGAAUCCCUCCGGGCCCACCA",
"struct2d": "(((((((..((((......[...)))).((((.........)))).....(((((..]....))))))))))))...."
},
"536": {
"occurences": 2,
"pdb": [
"3IVKB"
],
"sequence": "CUGCCCGAAGGGCUUGAGAACAUCGAAACACGAUGCAGAGGUGGCAGCCUCCGGUGGGUUAAAACCCAACGUUCUCAA",
"struct2d": "[[[[[(...).(.((((((((((((.....)))))..((((.]]]]]))))((.((((......)))).)))))))))"
},
"645": {
"occurences": 1,
"pdb": [
"3QSY"
],
"sequence": "CGCGGGGUGGAGCAGCCUGGUAGCUCGUCGGGCUCAUAACCCGAAGAUCGUCGGUUCAAAUCCGGCCCCCGCAACCA",
"struct2d": "...((((....((.........))...(.(((.......))).).....(((((.......)))))))))......."
},
"671": {
"occurences": 1,
"pdb": [
"3UMY"
],
"sequence": "GGGAUGCGUAGGAUAGGUGGGAGCCUGUGAACCCCCGCCUCCGGGUGGGGGGGAGGCGCCGGUGAAAUACCACCCUUCCC",
"struct2d": "((((.(......[..(((((..(((......((((((((....))))))))...)))..........].)))))).))))"
},
"680": {
"occurences": 1,
"pdb": [
"3W3S"
],
"sequence": "GGCCGGCUGGUGCCGCCCCGGGACUUCAAAUCCCGUGGGAGGUCCCGCAAGGGAGCUCCGGAGGGUUCGAUUCCCUC",
"struct2d": "((.((((..[.))))))((((((.......))))).)((((.((((....)))).)))).(((((..]....)))))"
},
"72": {
"occurences": 1,
"pdb": [
"1J2B"
],
"sequence": "GGGCCCGUGGUCUAGUUGGUCAUGACGCCGCCCUUACGAGGCGGAGGUCCGGGGUUCAAGUCCCCGCGGGCCCACCA",
"struct2d": "(((((((................(((..((((.......))))...)))(((((.......))))))))))))...."
},
"955": {
"occurences": 1,
"pdb": [
"4X0B"
],
"sequence": "GGGCCAGGUAGCUCAGUUGGUAGAGCACUGGACUGAAAAUCCAGGUGUCGGCGGUUCGAUUCCGCCCCUGGCCCACC",
"struct2d": ".((((.((..((((........)))).(.((.........)).).....(((((.......))))))).))))...."
},
"985": {
"occurences": 1,
"pdb": [
"4XJN"
],
"sequence": "UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU",
"struct2d": ".............................................................................."
}
}
This diff could not be displayed because it is too large.
>test_CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE__PDB_00376
.......***.......................******..................................
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
((((((..(((.........)))((((((((...))))))))...(((((.......))))))))))).....
>test_GUANINE_RIBOSWITCH_U22C,_A52G_MUTANT_BOUND_TO_HYPOXANTHINE__PDB_01023
............................*********.............................**
GGACAUACAAUCGCGUGGAUAUGGCACGCAAGUUUCUGCCGGGCACCGUAAAUGUCCGACUAUGUCCa
(((((((...(((((((.[[..[[)))))))........((((((]]...]]))))))..))))))).
>test_SOLUTION_STRUCTURE_OF_THE_P2B-P3_PSEUDOKNOT_FROM_HUMAN_TELOMERASE_RNA__PDB_00857
.............................*****.............
GGGCUGUUUUUCUCGCUGACUUUCAGCCCCAAACAAAAAAGUCAGCA
[[[[[[........(((((((((]]]]]]........))))))))).
The motif library used with --contacts is particular. It was provided by Isaure Chauvot de Beauchêne from the LORIA
laboratory. These motifs are made up of RNA fragments linked to proteins.
==================================================================================================================
Several versions of these designs have been provided, but the most complete is the latest:'motifs_06-06-2021.json'
The current scripts were created based on this file, and doesn't work with the other older libraries.
There is also 2 benchmarks files also in json format : 'benchmark_16-06-2021.json' and 'benchmark_16-07-2021.json'.
It contains complete RNA sequences that bind to a protein, the first one contains only 33 RNA, and the second one
contains 130 RNA.
The benchmark.dbn and benchmark.txt were created based on the 'benchmark_16-07-2021.json'.
They are mostly used for the Isaure_benchmark.py script and scripts from the 'scripts' directory.
The motifs_final.json it obtains after executing the count_pattern.cpp script in 'script' directory on
the 'motifs_06-06-2021.json' motifs file.
This script count the number of "occurrences" of the motif. So we consider that if the sequence of motif A
is included in motif B, then for each inclusion of B we also have an inclusion of A. And vice versa.
The motif library used by BiORSEO is the one in the 'bibliotheque_a_lire' directory. There should only be
the json file we wish to be used by BiORSEO for it's prediction. That's why you shouldn't put other type of file!
......@@ -22341,23 +22341,6 @@
"sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA",
"struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...."
},
"977": {
"contacts": "******************..&****************.**************&*************&*",
"occurences": 1,
"pdb": [
"5XBL"
],
"pfam": [
[
"PF16592",
"PF16593",
"PF16595",
"PF13395"
]
],
"sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U",
"struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&."
},
"978": {
"contacts": "*****",
"occurences": 9,
......
......@@ -22341,23 +22341,6 @@
"sequence": "G&C&GGGUAG&UAGGGGCUA&CAG&CCUUACGUGGGUUCAAAUCCCACCCC&G&CUCCA",
"struct2d": "(&(&(((..(&(.......)&).(&).....(((((.......))))))))&)&)...."
},
"977": {
"contacts": "******************..&****************.**************&*************&*",
"occurences": 1,
"pdb": [
"5XBL"
],
"pfam": [
[
"PF16592",
"PF16593",
"PF16595",
"PF13395"
]
],
"sequence": "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U",
"struct2d": "..........(((((...((&)).....)))))..(.(..)..).......(&).....(.....)&."
},
"978": {
"contacts": "*****",
"occurences": 9,
......
......@@ -6,6 +6,9 @@ import seaborn as sns
import pandas as pd
import matplotlib.pylab as plt
# Retrieve for each rna the best value for MEA and compare this energy value with the one obtains with
# RNAeval and RNAfold from the ViennaRNA Package 2.0 (Ronny Lorentz et al., 2011)
# After getting those values, it will creates a figure.
def get_result_MEA(filename):
ext = "json_pmE"
file2 = open( "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/" + filename + ext, "r")
......
from math import sqrt, ceil
import numpy as np
import matplotlib.pyplot as plt
file = open("/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn", "r")
name = file.readline()
rna = file.readline()
twod = file.readline()
contacts = file.readline()
length = len(rna)
nb_ctc = contacts.count('*')
print("--------------------------------------------------------")
ctc_max = nb_ctc
ctc_min = nb_ctc
np_lgt = []
np_lgt.append(length)
np_ctc = []
np_ctc.append(nb_ctc)
np = []
np.append([length, nb_ctc])
while name:
print(contacts)
print(length)
print(nb_ctc)
print("--------------------------------------------------------")
name = file.readline()
rna = file.readline()
length = len(rna)
if length != 0 :
np_lgt.append(length)
twod = file.readline()
contacts = file.readline()
nb_ctc = contacts.count('*')
if nb_ctc != 0:
np_ctc.append(nb_ctc)
np.append([length, nb_ctc])
if nb_ctc > ctc_max:
ctc_max = nb_ctc
if nb_ctc < ctc_min and nb_ctc != 0:
ctc_min = nb_ctc
file.close()
print(np_lgt)
print(np_ctc)
print(np)
x = np_lgt
y = np_ctc
index = np_ctc.index(ctc_max)
index2 = np_ctc.index(ctc_min)
plt.scatter(x, y, c = 'blue')
plt.annotate("(" + str(np_lgt[index]) + "," + str(ctc_max) + ")", (np_lgt[index], ctc_max),c ='red')
plt.scatter(np_lgt[index], ctc_max,c = 'red')
plt.annotate("(" + str(np_lgt[index2]) + "," + str(ctc_min) + ")", (np_lgt[index2], ctc_min),c ='green')
plt.scatter(np_lgt[index2], ctc_min,c = 'green')
plt.xlabel('longeur de l\'arn')
plt.ylabel('nombre de contacts')
plt.savefig('stats.png')
......@@ -11,6 +11,7 @@
using namespace std;
using json = nlohmann::json;
//Count the number of '&' in the motif sequence
size_t count_delimiter(string& seq) {
size_t count = 0;
for(uint i = 0; i < seq.size(); i++) {
......@@ -22,6 +23,10 @@ size_t count_delimiter(string& seq) {
return count;
}
/*
If there is a '&' in the motif sequence in the field 'sequence' but not in the field 'contacts',
th script put a '&' in the same position in the field 'contacts' than in the field 'sequence'.
*/
void add_delimiter(const string& jsonfile, const string& jsonoutfile) {
std::ifstream lib(jsonfile);
......@@ -77,13 +82,9 @@ void add_delimiter(const string& jsonfile, const string& jsonoutfile) {
int main()
{
//183
//cout << "------------------BEGIN-----------------" << endl;
string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_tmp.json";
string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_tmp.json";
add_delimiter(jsonfile, out);
//cout << "------------------END-----------------" << endl;
return 0;
}
......
......@@ -11,6 +11,12 @@
using namespace std;
using json = nlohmann::json;
/*
This script count the number of "occurrences" of the motif.
So we consider that if the sequence of pattern A is included in pattern B,
then for each inclusion of B we also have an inclusion of A. And vice versa.
*/
//Return true if the first sequence seq1 is included in the second sequence seq2
//if not return false
int is_contains(string& seq1, string& seq2) {
......@@ -38,6 +44,8 @@ int is_contains(string& seq1, string& seq2) {
//If we find the sequence and structure of pattern A in pattern B, we have to concatenate the pfam lists of A and B,
//remove the duplicates, assign this new list of pfam lists to A, and assign as occurrence to A the size of this list.
//The pattern A is counted only once in every other pattern, i.e. even if the sequence of A is found several times in B,
// it will be added only once in the occurrences of A.
void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
std::ifstream lib(jsonfile);
std::ifstream lib2(jsonfile);
......@@ -73,14 +81,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
if (!test.compare("pfam")) {
vector<vector<string>> tab = it2.value();
list_pfams = tab;
/*set<set<string>>::iterator iit;
set<string>::iterator iit2;
for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
cout << *iit2 << endl;
}
cout << endl << endl;
}*/
} else if (!test.compare("sequence")) {
//cout << "sequence: " << it2.value() << endl;
sequence = it2.value();
......@@ -124,7 +124,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
new_id[test] = it2.value();
}
}
//cout << "-------begin---------" << endl;
for (auto it3 = js2.begin(); it3 != js2.end(); ++it3) {
string id2 = it3.key();
......@@ -142,22 +141,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
if (!test.compare("pfam")) {
vector<vector<string>> tab = it4.value();
list_pfams2 = tab;
/*for (uint k = 0; k < tab2.size(); k++) {
for (uint l = 0; l < tab2[k].size(); l++) {
pfams2.insert(tab2[k][l]);
}
list_pfams2.insert(pfams);
pfams2.clear();
}*/
/*set<set<string>>::iterator iit;
set<string>::iterator iit2;
for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
cout << *iit2 << endl;
}
cout << endl << endl;
}*/
} else if (!test.compare("occurences")) {
occurences2 = it4.value();
//cout << "occurences2: "<< occurences2 << endl;
......@@ -216,7 +199,6 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
}
}
//cout << "----end----" << endl;
//}
}
if(flag) {
......@@ -242,23 +224,12 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
//cout << endl;*/
}
/*for(uint ii = 0; ii < list_pfams.size(); ii++) {
for (uint jj = 0; jj < list_pfams[ii].size(); jj++) {
cout << "[" << ii << "][" << jj << "]: " << list_pfams[ii][jj] << endl;
}
}*/
new_id["occurences"] = list_pfams.size();
new_id["pfam"] = list_pfams;
//cout << "-------ending---------" << endl;
new_id["pfam"] = list_pfams;
new_motif[id] = new_id;
new_id.clear();
//cout << "valeur: " << ite << endl;
/*for (uint i = 0; i < tab_struc.size() ; i++) {
cout << "tab_struc[" << i << "]: " << tab_struc[i] << endl << endl;
} */
}
outfile << new_motif.dump(4) << endl;
outfile.close();
......@@ -267,13 +238,11 @@ void counting_occurences(const string& jsonfile, const string& jsonoutfile) {
int main()
{
//183
//cout << "------------------BEGIN-----------------" << endl;
string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_06-06-2021.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
string jsonfile = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_06-06-2021.json";
string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json";
counting_occurences(jsonfile, out);
//cout << "------------------END-----------------" << endl;
return 0;
}
......
#include <iostream>
#include <sstream>
#include <fstream>
#include "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/cppsrc/json.hpp"
#include <typeinfo>
#include <set>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
using json = nlohmann::json;
/*
Create a .fasta file for each of the sequence inside the benchmark in json format.
Also create a .dbn and .txt file that list the name, sequence, 2d structure and contacts for all sequence in the benchmark file.
Those files are useful for the Isaure_benchmark.py script.
*/
void create_files(const string& jsonmotifs) {
std::ifstream lib(jsonmotifs);
string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/";
string list = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.txt";
string dbn = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
std::ofstream outlist (list);
std::ofstream outdbn (dbn);
json js = json::parse(lib);
uint count = 0;
for (auto it = js.begin(); it != js.end(); ++it) {
string id = it.key();
string name, seq, contacts, structure;
for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
string chain = it2.key();
if (chain.compare("pfams") != 0) {
string name = id + "_" + chain;
string filename = fasta + name + ".fa";
std::ofstream outfasta (filename);
outfasta << ">test_" << name << endl;
for (auto it3 = js[id][chain].begin(); it3 != js[id][chain].end(); ++it3) {
string field = it3.key();
if (!field.compare("sequence")) {
seq = it3.value();
outfasta << seq.substr(0,seq.size()) << endl;
outfasta.close();
} else if (!field.compare("contacts")) {
contacts = it3.value();
} else if (!field.compare("struct2d")) {
structure = it3.value();
}
}
if(seq.find('&') == string::npos) {
outlist << ">test_" << name << endl;
outdbn << "test_" << name << "." << endl;
outlist << contacts << endl;
outdbn << seq << endl;
outdbn << structure << endl;
outdbn << contacts << endl;
outlist << seq << endl;
outlist << structure << endl;
count++;
}
}
}
}
cout << count << " sequences en tout" << endl;
lib.close();
outlist.close();
outdbn.close();
}
int main()
{
string path = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/";
string jsonbm = path + "modules/ISAURE/benchmark_16-07-2021.json";
create_files(jsonbm);
return 0;
}
......@@ -12,6 +12,10 @@
using namespace std;
using json = nlohmann::json;
/*
This script is use to create a new motif library without a motif that contains the same pdb as the sequence used in input for prediction
with BiORSEO.
*/
void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) {
std::ifstream lib(jsonlibrary);
......@@ -51,8 +55,8 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& name, const s
int main(int argc, char** argv)
{
string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json";
string out = "/local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
string jsonlibrary = "/local/local/BiorseoNath/data/modules/ISAURE/motifs_final.json";
string out = "/local/local/BiorseoNath/data/modules/ISAURE/bibliotheque_a_lire/motifs_final.json";
string name = argv[1];
delete_redundant_pdb(jsonlibrary, name, out);
return 0;
......
......@@ -12,18 +12,23 @@ using namespace std;
using json = nlohmann::json;
/*
That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from.
That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from (with the same pdb).
*/
//To store the pdb and the sequence in the benchmark file. Also stor the corresponding motif id and components based on this sequence.
struct data {
//the pdb code (in the name of the sequence)
string pdb;
//the complete sequence with this pdb code
string seq_pdb;
//the id of the motif corresponding to this pdb in the library
string id;
//the module sequence with the components of this motif with the above id
string cmp;
};
typedef struct data data;
//returns the list of pdb codes and the corresponding information from the benchmark file.
vector<data> get_list_pdb_benchmark(const string& benchmark) {
fstream bm(benchmark);
......@@ -57,6 +62,7 @@ string trim(string str) {
return str;
}
//store the corresponding id and motif to the sequence from the benchmark file
data find_id_pattern(string& pdb_pattern, const string& benchmark) {
vector<data> l = get_list_pdb_benchmark(benchmark);
int size = l.size();
......@@ -71,6 +77,8 @@ data find_id_pattern(string& pdb_pattern, const string& benchmark) {
return data();
}
//Create an array of data ('association'), which consists of each pdb of the benchmark file
// with the associated pattern from this sequence.
vector<data> find_id(const string& bibli, const string& benchmark) {
ifstream lib(bibli);
json js = json::parse(lib);
......@@ -112,6 +120,7 @@ vector<data> find_id(const string& bibli, const string& benchmark) {
return association;
}
//check if the motif is found matching with a complete sequence from a benchmark file.
bool does_it_match(const string& seq, const string& seq_motif) {
size_t found = seq_motif.find("&");
size_t size = seq_motif.size();
......@@ -150,6 +159,7 @@ bool does_it_match(const string& seq, const string& seq_motif) {
return false;
}
//return the list of motif id that didn't match with any other complete sequence than the one which it came from.
vector<string> select_not_motif(const string& bibli, const string& benchmark) {
vector<string> selection;
vector<data> association = find_id(bibli, benchmark);
......@@ -187,8 +197,8 @@ vector<string> select_not_motif(const string& bibli, const string& benchmark) {
int main()
{
string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/motifs_final.json";
string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/benchmark.dbn";
/*vector<data> v = get_list_pdb_benchmark(benchmark);
for (data d : v) {
......