Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-02-22 12:05:15 +0100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
8fb2087130cdef22dc31881f2185d1546a63b977
8fb20871
1 parent
4bfcaf37
Classe Motif & début de loader pour RNA3dMotif
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
270 additions
and
77 deletions
cppsrc/Motif.cpp
cppsrc/Motif.h
cppsrc/SecondaryStructure.cpp
cppsrc/SecondaryStructure.h
cppsrc/biominserter.cpp
cppsrc/Motif.cpp
0 → 100644
View file @
8fb2087
#include "Motif.h"
#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>
#include <iostream>
#include <regex>
#include <sstream>
using
namespace
boost
::
filesystem
;
struct
recursive_directory_range
{
typedef
recursive_directory_iterator
iterator
;
recursive_directory_range
(
path
p
)
:
p_
(
p
)
{}
iterator
begin
()
{
return
recursive_directory_iterator
(
p_
);
}
iterator
end
()
{
return
recursive_directory_iterator
();
}
path
p_
;
};
Motif
::
Motif
(
void
)
{}
void
Motif
::
build_from_desc
(
const
string
&
descfile
)
{
std
::
ifstream
motif
;
string
line
;
string
seq
;
vector
<
string
>
component_sequences
;
vector
<
string
>
bases
;
int
last
;
PDBID
=
descfile
.
substr
(
0
,
descfile
.
find
(
".desc"
));
is_model_
=
false
;
reversed_
=
false
;
source_
=
RNA3DMOTIF
;
motif
=
std
::
ifstream
(
descfile
);
std
::
getline
(
motif
,
line
);
// ignore "id: number"
std
::
getline
(
motif
,
line
);
// Bases: 866_G 867_G 868_G 869_G 870_U 871_A ...
split
(
bases
,
line
,
boost
::
is_any_of
(
" "
));
// get a vector of 866_G, 867_G, etc...
seq
=
bases
[
1
].
back
();
last
=
std
::
stoi
(
bases
[
1
].
substr
(
0
,
bases
[
1
].
find
(
'_'
)));
for
(
vector
<
string
>::
iterator
b
=
bases
.
begin
()
+
2
;
b
!=
bases
.
end
();
b
++
)
{
char
nt
=
b
->
back
();
int
pos
=
std
::
stoi
(
b
->
substr
(
0
,
b
->
find
(
'_'
)));
if
(
pos
-
last
>
5
)
{
// finish this component and start a new one
component_sequences
.
push_back
(
seq
);
seq
=
nt
;
}
else
if
(
pos
-
last
==
1
)
{
// we are on the same component
seq
+=
nt
;
}
else
if
(
pos
-
last
==
2
)
{
seq
+=
'.'
+
nt
;
}
else
if
(
pos
-
last
==
3
)
{
seq
+=
".."
+
nt
;
}
else
if
(
pos
-
last
==
4
)
{
seq
+=
"..."
+
nt
;
}
else
if
(
pos
-
last
==
5
)
{
seq
+=
"...."
+
nt
;
}
}
// Now component_sequences is a vector of sequences like {AGCGC, CGU..GUUU}
for
(
string
&
comp
:
component_sequences
)
{
}
}
void
Motif
::
load_from_csv
(
string
csv_line
)
{
vector
<
string
>
tokens
;
split
(
tokens
,
csv_line
,
boost
::
is_any_of
(
","
));
atlas_id
=
tokens
[
0
];
score_
=
stoi
(
tokens
[
2
]);
comp
.
push_back
(
Component
(
std
::
make_pair
<
int
,
int
>
(
stoi
(
tokens
[
3
]),
stoi
(
tokens
[
4
]))));
if
(
tokens
[
5
]
!=
"-"
)
comp
.
push_back
(
Component
(
std
::
make_pair
<
int
,
int
>
(
stoi
(
tokens
[
5
]),
stoi
(
tokens
[
6
]))));
reversed_
=
(
tokens
[
1
]
==
"True"
);
is_model_
=
true
;
PDBID
=
""
;
source_
=
RNAMOTIFATLAS
;
}
string
Motif
::
pos_string
(
void
)
const
{
std
::
stringstream
s
;
s
<<
atlas_id
<<
" ( "
;
for
(
auto
c
:
comp
)
s
<<
c
.
pos
.
first
<<
'-'
<<
c
.
pos
.
second
<<
' '
;
s
<<
')'
;
return
s
.
str
();
}
string
Motif
::
get_identifier
(
void
)
const
{
switch
(
source_
)
{
case
RNAMOTIFATLAS
:
return
atlas_id
;
break
;
default
:
return
PDBID
;
}
}
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
,
const
string
&
rna
)
{
vector
<
Motif
>
posInsertionSites
;
if
(
!
exists
(
path
))
{
std
::
cerr
<<
"Hmh, i can't find that folder: "
<<
path
<<
std
::
endl
;
return
posInsertionSites
;
}
for
(
auto
it
:
recursive_directory_range
(
path
))
{
if
(
is_desc_insertible
(
it
.
path
().
string
(),
rna
))
{
posInsertionSites
.
push_back
(
Motif
());
posInsertionSites
.
back
().
build_from_desc
(
it
.
path
().
string
());
}
}
return
posInsertionSites
;
}
vector
<
Motif
>
load_jar3d_output
(
const
string
&
path
)
{
vector
<
Motif
>
posInsertionSites
;
std
::
ifstream
motifs
;
string
line
;
motifs
=
std
::
ifstream
(
path
);
std
::
getline
(
motifs
,
line
);
// skip header
while
(
std
::
getline
(
motifs
,
line
))
{
posInsertionSites
.
push_back
(
Motif
());
posInsertionSites
.
back
().
load_from_csv
(
line
);
}
return
posInsertionSites
;
}
bool
is_desc_insertible
(
const
string
&
descfile
,
const
string
&
rna
,
bool
verbose
)
{
std
::
ifstream
motif
;
string
line
;
string
seq
;
vector
<
string
>
bases
;
int
last
;
motif
=
std
::
ifstream
(
descfile
);
std
::
getline
(
motif
,
line
);
// ignore "id: number"
std
::
getline
(
motif
,
line
);
// Bases: 866_G 867_G 868_G 869_G 870_U 871_A ...
split
(
bases
,
line
,
boost
::
is_any_of
(
" "
));
// get a vector of 866_G, 867_G, etc...
seq
=
bases
[
1
].
back
();
last
=
std
::
stoi
(
bases
[
1
].
substr
(
0
,
bases
[
1
].
find
(
'_'
)));
for
(
vector
<
string
>::
iterator
b
=
bases
.
begin
()
+
2
;
b
!=
bases
.
end
();
b
++
)
{
char
nt
=
b
->
back
();
int
pos
=
std
::
stoi
(
b
->
substr
(
0
,
b
->
find
(
'_'
)));
if
(
pos
-
last
>
5
)
{
// finish this component and start a new one
seq
+=
".{5,}"
+
nt
;
}
else
if
(
pos
-
last
==
1
)
{
// we are on the same component
seq
+=
nt
;
}
else
if
(
pos
-
last
==
2
)
{
seq
+=
"."
+
nt
;
}
else
if
(
pos
-
last
==
3
)
{
seq
+=
".."
+
nt
;
}
else
if
(
pos
-
last
==
4
)
{
seq
+=
"..."
+
nt
;
}
else
if
(
pos
-
last
==
5
)
{
seq
+=
"...."
+
nt
;
}
last
=
pos
;
}
std
::
smatch
m
;
std
::
regex
e
(
seq
);
if
(
std
::
regex_search
(
rna
,
m
,
e
))
{
if
(
verbose
)
std
::
cout
<<
"Motif "
<<
descfile
.
substr
(
0
,
descfile
.
find
(
".desc"
))
<<
" "
<<
seq
<<
" can be inserted."
<<
std
::
endl
;
return
true
;
}
else
{
if
(
verbose
)
std
::
cout
<<
"Ignoring motif "
<<
descfile
.
substr
(
0
,
descfile
.
find
(
".desc"
))
<<
" "
<<
seq
<<
std
::
endl
;
return
false
;
}
}
bool
operator
==
(
const
Component
&
c1
,
const
Component
&
c2
)
{
if
(
c1
.
pos
.
first
!=
c2
.
pos
.
first
)
return
false
;
if
(
c1
.
pos
.
second
!=
c2
.
pos
.
second
)
return
false
;
return
true
;
}
bool
operator
!=
(
const
Component
&
c1
,
const
Component
&
c2
)
{
return
not
(
c1
==
c2
);
}
bool
operator
==
(
const
Motif
&
m1
,
const
Motif
&
m2
)
{
if
(
m1
.
get_identifier
()
!=
m2
.
get_identifier
())
return
false
;
if
(
m1
.
score_
!=
m2
.
score_
)
return
false
;
if
(
m1
.
reversed_
!=
m2
.
reversed_
)
return
false
;
for
(
uint
i
=
0
;
i
<
m1
.
comp
.
size
();
i
++
)
if
(
m1
.
comp
[
i
]
!=
m2
.
comp
[
i
])
return
false
;
return
true
;
}
bool
operator
!=
(
const
Motif
&
m1
,
const
Motif
&
m2
)
{
return
not
(
m1
==
m2
);
}
cppsrc/Motif.h
0 → 100644
View file @
8fb2087
#ifndef MOTIF_H_
#define MOTIF_H_
#include <string>
#include <vector>
using
std
::
pair
;
using
std
::
string
;
using
std
::
vector
;
typedef
struct
Comp_
{
pair
<
uint
,
uint
>
pos
;
size_t
k
;
string
seq_
;
Comp_
(
pair
<
int
,
int
>
p
)
:
pos
(
p
)
{
k
=
1
+
pos
.
second
-
pos
.
first
;
}
}
Component
;
class
Motif
{
public
:
Motif
();
void
load_from_csv
(
string
csv_line
);
void
build_from_desc
(
const
string
&
descfile
);
string
pos_string
(
void
)
const
;
string
get_origin
(
void
)
const
;
string
get_identifier
(
void
)
const
;
vector
<
Component
>
comp
;
double
score_
;
bool
reversed_
;
private
:
string
atlas_id
;
// if source = RNAMOTIFATLAS
string
PDBID
;
// if source = RNA3DMOTIF
bool
is_model_
;
// Wether the motif is a model or an extracted module from a 3D structure
enum
{
RNA3DMOTIF
=
1
,
RNAMOTIFATLAS
=
2
,
CARNAVAL
=
3
}
source_
;
};
bool
is_desc_insertible
(
const
string
&
descfile
,
const
string
&
rna
);
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
);
vector
<
Motif
>
load_jar3d_output
(
const
string
&
path
);
// utilities to compare secondary structures:
bool
operator
==
(
const
Motif
&
m1
,
const
Motif
&
m2
);
bool
operator
!=
(
const
Motif
&
m1
,
const
Motif
&
m2
);
bool
operator
==
(
const
Component
&
c1
,
const
Component
&
c2
);
bool
operator
!=
(
const
Component
&
c1
,
const
Component
&
c2
);
#endif // MOTIF_H_
\ No newline at end of file
cppsrc/SecondaryStructure.cpp
View file @
8fb2087
...
...
@@ -104,7 +104,7 @@ string SecondaryStructure::to_string(void) const
{
string
s
;
s
+=
to_DBN
();
for
(
const
Motif
&
m
:
motif_info_
)
s
+=
" + "
+
m
.
atlas_id
;
for
(
const
Motif
&
m
:
motif_info_
)
s
+=
" + "
+
m
.
get_identifier
()
;
s
+=
"
\t
"
+
boost
::
str
(
boost
::
format
(
"%.7f"
)
%
objective_scores_
[
0
])
+
"
\t
"
+
boost
::
str
(
boost
::
format
(
"%.7f"
)
%
objective_scores_
[
1
]);
return
s
;
...
...
@@ -164,7 +164,7 @@ bool basepair_sorter(pair<uint, uint>& i, pair<uint, uint>& j)
bool
motif_sorter
(
Motif
&
m1
,
Motif
&
m2
)
{
if
(
m1
.
atlas_id
.
compare
(
m2
.
atlas_id
)
<
0
)
return
true
;
if
(
m1
.
get_identifier
().
compare
(
m2
.
get_identifier
()
)
<
0
)
return
true
;
return
false
;
}
...
...
@@ -282,30 +282,6 @@ bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2)
return
false
;
}
bool
operator
==
(
const
Component
&
c1
,
const
Component
&
c2
)
{
if
(
c1
.
pos
.
first
!=
c2
.
pos
.
first
)
return
false
;
if
(
c1
.
pos
.
second
!=
c2
.
pos
.
second
)
return
false
;
return
true
;
}
bool
operator
!=
(
const
Component
&
c1
,
const
Component
&
c2
)
{
return
not
(
c1
==
c2
);
}
bool
operator
==
(
const
Motif
&
m1
,
const
Motif
&
m2
)
{
if
(
m1
.
atlas_id
!=
m2
.
atlas_id
)
return
false
;
if
(
m1
.
score
!=
m2
.
score
)
return
false
;
if
(
m1
.
reversed
!=
m2
.
reversed
)
return
false
;
for
(
uint
i
=
0
;
i
<
m1
.
comp
.
size
();
i
++
)
if
(
m1
.
comp
[
i
]
!=
m2
.
comp
[
i
])
return
false
;
return
true
;
}
bool
operator
!=
(
const
Motif
&
m1
,
const
Motif
&
m2
)
{
return
not
(
m1
==
m2
);
}
bool
operator
==
(
const
SecondaryStructure
&
s1
,
const
SecondaryStructure
&
s2
)
{
...
...
cppsrc/SecondaryStructure.h
View file @
8fb2087
#ifndef __INC_IP_SOL__
#define __INC_IP_SOL__
#define IL_STD
#ifndef SECONDARY_STRUCTURE_
#define SECONDARY_STRUCTURE_
#include "Motif.h"
#include "rna.h"
#include <iostream>
#include <string>
...
...
@@ -12,28 +11,6 @@ using std::pair;
using
std
::
string
;
using
std
::
vector
;
typedef
struct
Comp_
{
pair
<
uint
,
uint
>
pos
;
size_t
k
;
Comp_
(
pair
<
int
,
int
>
p
)
:
pos
(
p
)
{
k
=
1
+
pos
.
second
-
pos
.
first
;
}
}
Component
;
typedef
struct
{
string
atlas_id
;
vector
<
Component
>
comp
;
bool
reversed
;
int
score
;
string
pos_string
(
void
)
const
{
std
::
stringstream
s
;
s
<<
atlas_id
<<
" ( "
;
for
(
auto
c
:
comp
)
{
s
<<
c
.
pos
.
first
<<
'-'
<<
c
.
pos
.
second
<<
' '
;
}
s
<<
')'
;
return
s
.
str
();
}
}
Motif
;
class
SecondaryStructure
{
...
...
@@ -72,11 +49,7 @@ bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2);
// return wether SecondaryStructures are identical or not
bool
operator
==
(
const
SecondaryStructure
&
s1
,
const
SecondaryStructure
&
s2
);
bool
operator
!=
(
const
SecondaryStructure
&
s1
,
const
SecondaryStructure
&
s2
);
// utilities to compare secondary structures:
bool
operator
==
(
const
Motif
&
m1
,
const
Motif
&
m2
);
bool
operator
!=
(
const
Motif
&
m1
,
const
Motif
&
m2
);
bool
operator
==
(
const
Component
&
c1
,
const
Component
&
c2
);
bool
operator
!=
(
const
Component
&
c1
,
const
Component
&
c2
);
bool
motif_sorter
(
Motif
&
m1
,
Motif
&
m2
);
bool
basepair_sorter
(
pair
<
uint
,
uint
>&
i
,
pair
<
uint
,
uint
>&
j
);
...
...
@@ -85,4 +58,5 @@ inline void SecondaryStructure::set_objective_score(int i, double s) { objecti
inline
uint
SecondaryStructure
::
get_n_motifs
(
void
)
const
{
return
motif_info_
.
size
();
}
inline
uint
SecondaryStructure
::
get_n_bp
(
void
)
const
{
return
nBP_
;
}
#endif
#endif // SECONDARY_STRUCTURE_
\ No newline at end of file
...
...
cppsrc/biominserter.cpp
View file @
8fb2087
...
...
@@ -3,7 +3,6 @@
***/
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <cstdlib>
#include <iostream>
#include <iterator>
...
...
@@ -12,6 +11,7 @@
#include <vector>
#include "MOIP.h"
#include "Motif.h"
#include "fa.h"
using
namespace
std
;
...
...
@@ -49,19 +49,6 @@ string remove_ext(const char* mystr, char dot, char sep)
return
string
(
retstr
);
}
Motif
parse_csv_line
(
string
line
)
{
vector
<
string
>
tokens
;
boost
::
split
(
tokens
,
line
,
boost
::
is_any_of
(
","
));
Motif
m
;
m
.
atlas_id
=
tokens
[
0
];
m
.
score
=
stoi
(
tokens
[
2
]);
m
.
comp
.
push_back
(
Component
(
make_pair
<
int
,
int
>
(
stoi
(
tokens
[
3
]),
stoi
(
tokens
[
4
]))));
if
(
tokens
[
5
]
!=
"-"
)
m
.
comp
.
push_back
(
Component
(
make_pair
<
int
,
int
>
(
stoi
(
tokens
[
5
]),
stoi
(
tokens
[
6
]))));
m
.
reversed
=
(
tokens
[
1
]
==
"True"
);
return
m
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
/* ARGUMENT CHECKING */
...
...
@@ -81,8 +68,6 @@ int main(int argc, char* argv[])
string
basename
=
remove_ext
(
inputName
,
'.'
,
'/'
);
float
theta_p_threshold
=
atof
(
argv
[
3
]);
list
<
Fasta
>
f
;
string
line
;
ifstream
motifs
;
vector
<
Motif
>
posInsertionSites
;
ofstream
outfile
;
SecondaryStructure
bestSSO1
,
bestSSO2
;
...
...
@@ -108,9 +93,7 @@ int main(int argc, char* argv[])
cerr
<<
csvname
<<
" not found"
<<
endl
;
return
EXIT_FAILURE
;
}
motifs
=
ifstream
(
csvname
);
getline
(
motifs
,
line
);
// skip header
while
(
getline
(
motifs
,
line
))
posInsertionSites
.
push_back
(
parse_csv_line
(
line
));
posInsertionSites
=
load_desc_folder
(
csvname
);
if
(
verbose
)
cout
<<
"
\t
>"
<<
csvname
<<
" successfuly loaded ("
<<
posInsertionSites
.
size
()
<<
" insertion sites)"
<<
endl
;
...
...
Please
register
or
login
to post a comment