Motif.h
3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifndef MOTIF_H_
#define MOTIF_H_
#include <boost/filesystem.hpp>
#include <mutex>
#include <string>
#include <vector>
#include <filesystem>
#include "rna.h"
using boost::filesystem::path;
using std::pair;
using std::string;
using std::vector;
using std::mutex;
typedef struct Comp_ {
pair<uint, uint> pos;
size_t k;
string seq_;
Comp_(pair<int, int> p) : pos(p) { k = 1 + pos.second - pos.first; }
Comp_(uint start, uint length) : k(length)
{
pos.first = start;
pos.second = start + length - 1;
}
} Component;
typedef struct Link
{
pair<uint, uint> nts;
bool long_range;
} Link ;
class Motif
{
public:
Motif(void);
Motif(string csv_line);
Motif(const vector<Component>& v, string PDB);
Motif(const vector<Component>& v, string id, size_t contacts, double tx_occurrences);
Motif(const vector<Component>& v, path rinfile, uint id, bool reversed);
Motif(string path, int id); //full path to biorseo/data/modules/RIN/Subfiles/
static char is_valid_RIN(const string& rinfile);
static char is_valid_DESC(const string& descfile);
static vector<pair<uint,char>> is_valid_JSON(const string& jsonfile);
string pos_string(void) const;
string sec_struct(void) const;
string get_origin(void) const;
string get_identifier(void) const;
vector<Component> comp;
vector<Link> links_;
vector<uint> pos_contacts;
size_t contact_;
double tx_occurrences_;
double score_;
bool reversed_;
private:
string carnaval_id; // if source = CARNAVAL
string atlas_id; // if source = RNAMOTIFATLAS
string PDBID; // if source = RNA3DMOTIF
string contacts_id; // if source = CONTACTS
bool is_model_; // Whether the motif is a model or an extracted module from a 3D structure
enum { RNA3DMOTIF = 1, RNAMOTIFATLAS = 2, CARNAVAL = 3, CONTACTS = 4 } source_;
};
bool is_desc_insertible(const string& descfile, const string& rna);
bool is_rin_insertible(const string& rinfile, const string& rna);
bool is_json_insertible(const string& jsonfile, const string& rna);
vector<Motif> load_txt_folder(const string& path, const string& rna, bool verbose);
vector<Motif> load_desc_folder(const string& path, const string& rna, bool verbose);
vector<Motif> load_csv(const string& path);
vector<Motif> load_json_folder(const string& path, const string& rna, bool verbose);
vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<string>& vc);
vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector<string>& vc);
// utilities for Json motifs
size_t count_nucleotide(string&);
size_t count_delimiter(string&);
size_t count_contacts(string&);
string check_motif_sequence(string);
bool checkSecondaryStructure(string);
vector<Link> build_motif_pairs(string&, vector<Component>&);
uint find_max_occurrences(string&);
uint find_max_sequence(string&);
vector<string> find_components(string&, string);
vector<uint> find_contacts(vector<string>&, vector<Component>&);
// utilities to compare secondary structures:
bool operator==(const Motif& m1, const Motif& m2);
bool operator!=(const Motif& m1, const Motif& m2);
bool operator==(const Component& c1, const Component& c2);
bool operator!=(const Component& c1, const Component& c2);
#endif // MOTIF_H_