Motif.h
3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#ifndef MOTIF_H_
#define MOTIF_H_
#include <boost/filesystem.hpp>
#include <mutex>
#include <string>
#include <vector>
#include <filesystem>
#include "rna.h"
#include "json.hpp"
using boost::filesystem::path;
using std::pair;
using std::string;
using std::vector;
using std::mutex;
typedef enum { RNA3DMOTIF = 1, CSV = 2, CARNAVAL = 3, JSON = 4 } source_type;
typedef nlohmann::detail::iter_impl<nlohmann::basic_json<> > json_elem;
typedef struct Comp_ {
pair<uint, uint> pos;
size_t k;
string seq_;
Comp_(pair<int, int> p) : pos(p) { k = 1 + pos.second - pos.first; }
Comp_(uint start, uint length) : k(length)
{
pos.first = start;
pos.second = start + length - 1;
}
} Component;
typedef struct Link
{
pair<uint, uint> nts;
bool long_range;
} Link ;
class Motif
{
public:
Motif(void);
Motif(string csv_line);
Motif(const vector<Component>& v, string name);
Motif(const vector<Component>& v, string name, string& struc);
Motif(const vector<Component>& v, path rinfile, uint id, bool reversed);
// Motif(string path, int id); //full path to biorseo/data/modules/RIN/Subfiles/
static char is_valid_RIN(const string& rinfile);
static char is_valid_DESC(const string& descfile);
static char is_valid_JSON(const json_elem& i);
string pos_string(void) const;
string sec_struct(void) const;
string get_origin(void) const;
string get_identifier(void) const;
vector<Component> comp;
vector<Link> links_;
vector<uint> pos_contacts;
size_t contact_;
double tx_occurrences_;
double score_;
bool reversed_;
static uint delay;
// delay is the minimal shift between end of a component and begining of the next.
// For regular loop motifs, it should be at least 5 (because hairpins cannot be of size smaller than 5).
// For the general case, it could be zero, but solutions will look dirty...
// Higher values reduce combinatorial explosion of potential insertion sites.
private:
string id_;
source_type source_;
};
bool is_desc_insertible(const string& descfile, const string& rna);
bool check_motif_ss(string);
bool check_motif_sequence(string);
vector<Motif> load_txt_folder(const string& path, const string& rna, bool verbose);
vector<Motif> load_desc_folder(const string& path, const string& rna, bool verbose);
vector<Motif> load_csv(const string& path);
vector<Motif> load_json_folder(const string& path, const string& rna, bool verbose);
vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<string> vc);
// utilities to compare secondary structures:
bool operator==(const Motif& m1, const Motif& m2);
bool operator!=(const Motif& m1, const Motif& m2);
bool operator==(const Component& c1, const Component& c2);
bool operator!=(const Component& c1, const Component& c2);
#endif // MOTIF_H_