Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Louis BECQUEY
/
biorseo
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Network
Create a new issue
Builds
Commits
Authored by
Louis BECQUEY
2019-02-22 18:08:25 +0100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
d7ca4934b76ab13fac1fb4e5aa0d0e6ccbc7f2c7
d7ca4934
1 parent
8fb20871
Support for the old Rna3dMotif .desc format as input
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
82 additions
and
45 deletions
Makefile
cppsrc/Motif.cpp
cppsrc/Motif.h
cppsrc/biominserter.cpp
Makefile
View file @
d7ca493
...
...
@@ -14,7 +14,7 @@ CXXFLAGS = --std=c++17 -Wall -Wpedantic -Wextra -Wno-ignored-attributes -Wno-unu
LINKER
=
clang++
# linking flags here
LDFLAGS
=
-L
$(LCONCERT)
-L
$(LCPLEX)
-lconcert -lilocplex -lcplex -lpthread -ldl -lnupackpfunc -lnupackutils
LDFLAGS
=
-L
$(LCONCERT)
-L
$(LCPLEX)
-l
boost_system -lboost_filesystem -l
concert -lilocplex -lcplex -lpthread -ldl -lnupackpfunc -lnupackutils
# change these to proper directories where each file should be
SRCDIR
=
cppsrc
...
...
cppsrc/Motif.cpp
View file @
d7ca493
...
...
@@ -22,7 +22,15 @@ struct recursive_directory_range {
Motif
::
Motif
(
void
)
{}
void
Motif
::
build_from_desc
(
const
string
&
descfile
)
Motif
::
Motif
(
const
vector
<
Component
>&
v
,
string
PDB
)
:
comp
(
v
),
PDBID
(
PDB
)
{
is_model_
=
false
;
reversed_
=
false
;
source_
=
RNA3DMOTIF
;
}
vector
<
Motif
>
Motif
::
build_from_desc
(
const
string
&
descfile
,
string
rna
)
{
std
::
ifstream
motif
;
string
line
;
...
...
@@ -30,11 +38,7 @@ void Motif::build_from_desc(const string& descfile)
vector
<
string
>
component_sequences
;
vector
<
string
>
bases
;
int
last
;
PDBID
=
descfile
.
substr
(
0
,
descfile
.
find
(
".desc"
));
is_model_
=
false
;
reversed_
=
false
;
source_
=
RNA3DMOTIF
;
vector
<
Motif
>
results
;
motif
=
std
::
ifstream
(
descfile
);
std
::
getline
(
motif
,
line
);
// ignore "id: number"
...
...
@@ -49,25 +53,30 @@ void Motif::build_from_desc(const string& descfile)
if
(
pos
-
last
>
5
)
{
// finish this component and start a new one
component_sequences
.
push_back
(
seq
);
seq
=
nt
;
}
else
if
(
pos
-
last
==
1
)
{
// we are on the same component
seq
+=
nt
;
seq
=
""
;
}
else
if
(
pos
-
last
==
2
)
{
seq
+=
'.'
+
nt
;
seq
+=
'.'
;
}
else
if
(
pos
-
last
==
3
)
{
seq
+=
".."
+
nt
;
seq
+=
".."
;
}
else
if
(
pos
-
last
==
4
)
{
seq
+=
"..."
+
nt
;
seq
+=
"..."
;
}
else
if
(
pos
-
last
==
5
)
{
seq
+=
"...."
+
nt
;
seq
+=
"...."
;
}
seq
+=
nt
;
}
// Now component_sequences is a vector of sequences like {AGCGC, CGU..GUUU}
for
(
string
&
comp
:
component_sequences
)
{
// We need to search for the different positions where to insert the first component
vector
<
vector
<
Component
>>
vresults
=
find_next_ones_in
(
rna
,
component_sequences
);
// Now create proper motifs
for
(
vector
<
Component
>&
v
:
vresults
)
{
results
.
push_back
(
Motif
(
v
,
descfile
.
substr
(
0
,
descfile
.
find
(
".desc"
))));
}
return
results
;
}
void
Motif
::
load_from_csv
(
string
csv_line
)
{
vector
<
string
>
tokens
;
...
...
@@ -82,7 +91,6 @@ void Motif::load_from_csv(string csv_line)
source_
=
RNAMOTIFATLAS
;
}
string
Motif
::
pos_string
(
void
)
const
{
std
::
stringstream
s
;
...
...
@@ -100,12 +108,35 @@ string Motif::get_identifier(void) const
}
}
vector
<
vector
<
Component
>>
Motif
::
find_next_ones_in
(
string
rna
,
vector
<
string
>
vc
)
{
std
::
smatch
matches
;
std
::
regex
c
(
vc
[
0
]);
pair
<
uint
,
uint
>
pos
;
vector
<
vector
<
Component
>>
results
;
vector
<
vector
<
Component
>>
next_ones
;
vector
<
string
>
next_seqs
(
&
vc
[
1
],
&
vc
[
vc
.
size
()
-
1
]);
std
::
regex_search
(
rna
,
matches
,
c
);
for
(
uint
i
=
0
;
i
<
matches
.
size
();
++
i
)
// Pour chacun des matches
{
pos
.
first
=
matches
.
position
(
i
);
pos
.
second
=
matches
.
length
(
i
)
+
pos
.
first
-
1
;
next_ones
=
find_next_ones_in
(
rna
.
substr
(
pos
.
second
+
1
),
next_seqs
);
for
(
vector
<
Component
>
v
:
next_ones
)
// Pour chacune des combinaisons suivantes
{
// Combiner le match et la combinaison suivante
vector
<
Component
>
r
;
r
.
push_back
(
Component
(
pos
));
for
(
Component
&
c
:
v
)
r
.
push_back
(
c
);
results
.
push_back
(
r
);
}
}
return
results
;
}
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
,
const
string
&
rna
)
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
,
const
string
&
rna
,
bool
verbose
)
{
vector
<
Motif
>
posInsertionSites
;
...
...
@@ -115,9 +146,9 @@ vector<Motif> load_desc_folder(const string& path, const string& rna)
}
for
(
auto
it
:
recursive_directory_range
(
path
))
{
if
(
is_desc_insertible
(
it
.
path
().
string
(),
rna
))
{
posInsertionSites
.
push_back
(
Motif
()
);
posInsertionSites
.
back
().
build_from_desc
(
it
.
path
().
string
()
);
if
(
is_desc_insertible
(
it
.
path
().
string
(),
rna
,
verbose
))
{
vector
<
Motif
>
m
=
Motif
::
build_from_desc
(
it
.
path
().
string
(),
rna
);
for
(
Motif
&
mot
:
m
)
posInsertionSites
.
push_back
(
mot
);
}
}
return
posInsertionSites
;
...
...
@@ -158,18 +189,17 @@ bool is_desc_insertible(const string& descfile, const string& rna, bool verbose)
int
pos
=
std
::
stoi
(
b
->
substr
(
0
,
b
->
find
(
'_'
)));
if
(
pos
-
last
>
5
)
{
// finish this component and start a new one
seq
+=
".{5,}"
+
nt
;
}
else
if
(
pos
-
last
==
1
)
{
// we are on the same component
seq
+=
nt
;
seq
+=
".{5,}"
;
}
else
if
(
pos
-
last
==
2
)
{
seq
+=
"."
+
nt
;
seq
+=
"."
;
}
else
if
(
pos
-
last
==
3
)
{
seq
+=
".."
+
nt
;
seq
+=
".."
;
}
else
if
(
pos
-
last
==
4
)
{
seq
+=
"..."
+
nt
;
seq
+=
"..."
;
}
else
if
(
pos
-
last
==
5
)
{
seq
+=
"...."
+
nt
;
seq
+=
"...."
;
}
seq
+=
nt
;
// pos - last == 1 in particular
last
=
pos
;
}
std
::
smatch
m
;
...
...
cppsrc/Motif.h
View file @
d7ca493
...
...
@@ -13,32 +13,38 @@ typedef struct Comp_ {
size_t
k
;
string
seq_
;
Comp_
(
pair
<
int
,
int
>
p
)
:
pos
(
p
)
{
k
=
1
+
pos
.
second
-
pos
.
first
;
}
Comp_
(
uint
start
,
uint
length
)
:
k
(
length
)
{
pos
.
first
=
start
;
pos
.
second
=
start
+
length
-
1
;
}
}
Component
;
class
Motif
{
public
:
Motif
();
void
load_from_csv
(
string
csv_line
);
void
build_from_desc
(
const
string
&
descfile
);
string
pos_string
(
void
)
const
;
string
get_origin
(
void
)
const
;
string
get_identifier
(
void
)
const
;
vector
<
Component
>
comp
;
double
score_
;
bool
reversed_
;
Motif
(
const
vector
<
Component
>&
v
,
string
PDB
);
void
load_from_csv
(
string
csv_line
);
static
vector
<
Motif
>
build_from_desc
(
const
string
&
descfile
,
string
rna
);
string
pos_string
(
void
)
const
;
string
get_origin
(
void
)
const
;
string
get_identifier
(
void
)
const
;
vector
<
Component
>
comp
;
double
score_
;
bool
reversed_
;
private
:
static
vector
<
vector
<
Component
>>
find_next_ones_in
(
string
rna
,
vector
<
string
>
vc
);
string
atlas_id
;
// if source = RNAMOTIFATLAS
string
PDBID
;
// if source = RNA3DMOTIF
bool
is_model_
;
// Wether the motif is a model or an extracted module from a 3D structure
enum
{
RNA3DMOTIF
=
1
,
RNAMOTIFATLAS
=
2
,
CARNAVAL
=
3
}
source_
;
};
bool
is_desc_insertible
(
const
string
&
descfile
,
const
string
&
rna
);
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
);
bool
is_desc_insertible
(
const
string
&
descfile
,
const
string
&
rna
,
bool
verbose
);
vector
<
Motif
>
load_desc_folder
(
const
string
&
path
,
const
string
&
rna
,
bool
verbose
);
vector
<
Motif
>
load_jar3d_output
(
const
string
&
path
);
// utilities to compare secondary structures:
...
...
cppsrc/biominserter.cpp
View file @
d7ca493
...
...
@@ -93,9 +93,10 @@ int main(int argc, char* argv[])
cerr
<<
csvname
<<
" not found"
<<
endl
;
return
EXIT_FAILURE
;
}
posInsertionSites
=
load_desc_folder
(
csvname
);
posInsertionSites
=
load_desc_folder
(
csvname
,
fa
->
seq
(),
verbose
);
if
(
verbose
)
cout
<<
"
\t
>"
<<
csvname
<<
" successfuly loaded ("
<<
posInsertionSites
.
size
()
<<
" insertion sites)"
<<
endl
;
exit
(
0
);
/* FIND PARETO SET */
...
...
Please
register
or
login
to post a comment