Louis BECQUEY

doc

......@@ -2,6 +2,6 @@
CPLEXDir="/opt/ibm/ILOG/CPLEX_Studio128"
IEIGEN="/usr/local/include/eigen3"
INUPACK="/usr/local/include/nupack"
biorseoDir="/nhome/siniac/lbecquey/Software/biorseo"
jar3dexec="/nhome/siniac/lbecquey/Software/jar3dbin/jar3d_2014-12-11.jar"
bypdir="/nhome/siniac/lbecquey/Software/BayesPairing/bayespairing/src"
biorseoDir="/home/persalteas/Software/biorseo"
jar3dexec="/home/persalteas/Software/jar3dbin/jar3d_2014-12-11.jar"
bypdir="/home/persalteas/Software/BayesPairing/bayespairing/src"
......
No preview for this file type
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
<?xml version="1.0"?>
<!DOCTYPE ipe SYSTEM "ipe.dtd">
<ipe version="70206" creator="Ipe 7.2.7">
<info created="D:20181012182334" modified="D:20181012182334"/>
<ipestyle name="basic">
<symbol name="arrow/arc(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/farc(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/ptarc(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/fptarc(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="mark/circle(sx)" transformations="translations">
<path fill="sym-stroke">
0.6 0 0 0.6 0 0 e
0.4 0 0 0.4 0 0 e
</path>
</symbol>
<symbol name="mark/disk(sx)" transformations="translations">
<path fill="sym-stroke">
0.6 0 0 0.6 0 0 e
</path>
</symbol>
<symbol name="mark/fdisk(sfx)" transformations="translations">
<group>
<path fill="sym-fill">
0.5 0 0 0.5 0 0 e
</path>
<path fill="sym-stroke" fillrule="eofill">
0.6 0 0 0.6 0 0 e
0.4 0 0 0.4 0 0 e
</path>
</group>
</symbol>
<symbol name="mark/box(sx)" transformations="translations">
<path fill="sym-stroke" fillrule="eofill">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
-0.4 -0.4 m
0.4 -0.4 l
0.4 0.4 l
-0.4 0.4 l
h
</path>
</symbol>
<symbol name="mark/square(sx)" transformations="translations">
<path fill="sym-stroke">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
</path>
</symbol>
<symbol name="mark/fsquare(sfx)" transformations="translations">
<group>
<path fill="sym-fill">
-0.5 -0.5 m
0.5 -0.5 l
0.5 0.5 l
-0.5 0.5 l
h
</path>
<path fill="sym-stroke" fillrule="eofill">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
-0.4 -0.4 m
0.4 -0.4 l
0.4 0.4 l
-0.4 0.4 l
h
</path>
</group>
</symbol>
<symbol name="mark/cross(sx)" transformations="translations">
<group>
<path fill="sym-stroke">
-0.43 -0.57 m
0.57 0.43 l
0.43 0.57 l
-0.57 -0.43 l
h
</path>
<path fill="sym-stroke">
-0.43 0.57 m
0.57 -0.43 l
0.43 -0.57 l
-0.57 0.43 l
h
</path>
</group>
</symbol>
<symbol name="arrow/fnormal(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/pointed(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/fpointed(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/linear(spx)">
<path stroke="sym-stroke" pen="sym-pen">
-1 0.333 m
0 0 l
-1 -0.333 l
</path>
</symbol>
<symbol name="arrow/fdouble(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
-1 0 m
-2 0.333 l
-2 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/double(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
-1 0 m
-2 0.333 l
-2 -0.333 l
h
</path>
</symbol>
<pen name="heavier" value="0.8"/>
<pen name="fat" value="1.2"/>
<pen name="ultrafat" value="2"/>
<symbolsize name="large" value="5"/>
<symbolsize name="small" value="2"/>
<symbolsize name="tiny" value="1.1"/>
<arrowsize name="large" value="10"/>
<arrowsize name="small" value="5"/>
<arrowsize name="tiny" value="3"/>
<color name="red" value="1 0 0"/>
<color name="green" value="0 1 0"/>
<color name="blue" value="0 0 1"/>
<color name="yellow" value="1 1 0"/>
<color name="orange" value="1 0.647 0"/>
<color name="gold" value="1 0.843 0"/>
<color name="purple" value="0.627 0.125 0.941"/>
<color name="gray" value="0.745"/>
<color name="brown" value="0.647 0.165 0.165"/>
<color name="navy" value="0 0 0.502"/>
<color name="pink" value="1 0.753 0.796"/>
<color name="seagreen" value="0.18 0.545 0.341"/>
<color name="turquoise" value="0.251 0.878 0.816"/>
<color name="violet" value="0.933 0.51 0.933"/>
<color name="darkblue" value="0 0 0.545"/>
<color name="darkcyan" value="0 0.545 0.545"/>
<color name="darkgray" value="0.663"/>
<color name="darkgreen" value="0 0.392 0"/>
<color name="darkmagenta" value="0.545 0 0.545"/>
<color name="darkorange" value="1 0.549 0"/>
<color name="darkred" value="0.545 0 0"/>
<color name="lightblue" value="0.678 0.847 0.902"/>
<color name="lightcyan" value="0.878 1 1"/>
<color name="lightgray" value="0.827"/>
<color name="lightgreen" value="0.565 0.933 0.565"/>
<color name="lightyellow" value="1 1 0.878"/>
<dashstyle name="dashed" value="[4] 0"/>
<dashstyle name="dotted" value="[1 3] 0"/>
<dashstyle name="dash dotted" value="[4 2 1 2] 0"/>
<dashstyle name="dash dot dotted" value="[4 2 1 2 1 2] 0"/>
<textsize name="large" value="\large"/>
<textsize name="Large" value="\Large"/>
<textsize name="LARGE" value="\LARGE"/>
<textsize name="huge" value="\huge"/>
<textsize name="Huge" value="\Huge"/>
<textsize name="small" value="\small"/>
<textsize name="footnote" value="\footnotesize"/>
<textsize name="tiny" value="\tiny"/>
<textstyle name="center" begin="\begin{center}" end="\end{center}"/>
<textstyle name="itemize" begin="\begin{itemize}" end="\end{itemize}"/>
<textstyle name="item" begin="\begin{itemize}\item{}" end="\end{itemize}"/>
<gridsize name="4 pts" value="4"/>
<gridsize name="8 pts (~3 mm)" value="8"/>
<gridsize name="16 pts (~6 mm)" value="16"/>
<gridsize name="32 pts (~12 mm)" value="32"/>
<gridsize name="10 pts (~3.5 mm)" value="10"/>
<gridsize name="20 pts (~7 mm)" value="20"/>
<gridsize name="14 pts (~5 mm)" value="14"/>
<gridsize name="28 pts (~10 mm)" value="28"/>
<gridsize name="56 pts (~20 mm)" value="56"/>
<anglesize name="90 deg" value="90"/>
<anglesize name="60 deg" value="60"/>
<anglesize name="45 deg" value="45"/>
<anglesize name="30 deg" value="30"/>
<anglesize name="22.5 deg" value="22.5"/>
<opacity name="10%" value="0.1"/>
<opacity name="30%" value="0.3"/>
<opacity name="50%" value="0.5"/>
<opacity name="75%" value="0.75"/>
<tiling name="falling" angle="-60" step="4" width="1"/>
<tiling name="rising" angle="30" step="4" width="1"/>
</ipestyle>
<page>
<layer name="alpha"/>
<view layers="alpha" active="alpha"/>
<path layer="alpha" matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
96 704 m
32.249 0 0 -32.249 128 708 160 704 a
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
208 704 m
25.2982 0 0 -25.2982 232 696 256 704 a
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
128 704 m
77.746 0 0 -77.746 200 674.667 272 704 a
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
208 704 m
25.2982 0 0 -25.2982 232 696 256 704 a
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
128 704 m
77.746 0 0 -77.746 200 674.667 272 704 a
</path>
<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
96 704 m
32.249 0 0 -32.249 128 708 160 704 a
</path>
<text matrix="1 0 0 1 0 -16" transformations="translations" pos="64 688" stroke="black" type="label" width="202.242" height="17.213" depth="4.82" valign="baseline" size="Huge">structure $y$ with PK</text>
<path stroke="black" pen="ultrafat" arrow="normal/normal">
256 736 m
304 752 l
</path>
<path stroke="black" pen="ultrafat" arrow="normal/normal">
288 672 m
320 640 l
</path>
<text matrix="1 0 0 1 32 -16" transformations="translations" pos="384 752" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^1$</text>
<text transformations="translations" pos="416 576" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^2$</text>
<text transformations="translations" pos="432 672" stroke="black" type="label" width="17.843" height="13.97" depth="1.57" valign="baseline" size="Huge">+</text>
</page>
</ipe>
This diff is collapsed. Click to expand it.
No preview for this file type
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{stmaryrd} % llbracket, rrbracket
\usepackage{siunitx} % SI units
\usepackage{geometry}
\usepackage{charter} % betterfont
\geometry{top=1.5cm,bottom=1.5cm, left=2cm, right=2cm}
\begin{document}
\appendix
\section{Linear constraints to model RNA Structure in a linear integer program}
The constraints have been rewritten by us, but are inspired by works like IPknot, Biokop, and RNA-MoIP.
\paragraph{Extended notations} ~ Here we repeat the definition of the variables that we already used in the article, and we use a few more, that also are defined:\\
Let $n$ be the number of nucleotides in the query RNA sequence $s$.\\
Let $M$ be the set of modules that could be inserted in $s$.\\
Let $x$ be a module of $M$, $\|x\|$ be the number of distinct components of $x$, and $p(x)$ the associated score of insertion given by JAR3D for that motif inserted at a particular position.\\
Let $P_{x,i}$ be the position in $s$ where we can insert the $i$th component of module $x$.\\
As the same module model can be inserted several times in $s$, several different $x$ modules in $M$ may refer to the same theoretical module, but inserted at different positions.\\
Let $k_{x,i}$ be the size in nucleotides of that $i$th component of $x$.\\
Let $y^u_v$ be the \textbf{decision boolean variable} indicating that $s[u]$ and $s[v]$ form a canonical base pairing. According to the standard loop model, we always have $v > u + 3$.\\
Let $C^x_i$ be the \textbf{decision boolean variable} indicating that we do insert the $i$th component of module $x$ at position $P_{x,i}$.
Note that a base pair $y^u_v$ is possible if and only if $v>u+3$, and that we do not need to use two variables $y^u_v$ and $y_{vu}$ for the same pair.
Then, we have $\sum_{i=4}^n (n-i)$ decision variables ($\approx \frac{1}{2}n^2$ decision variables) of the form $y^u_v$.
Regarding the $C^x_i$, if we have an average insertion of $\nu$ motives by RNA sequence, the motives having in average $\mu$ components, components that can be inserted in average at $\pi$ different positions in $s$,
then we need to add, in average, $\nu \times \mu \times \pi$ decision variables $C^x_i$.
Then, we expect having around $\frac{1}{2}n^2+\nu \mu \pi$ decision variables.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraint to ensure there only is 0 or 1 canonical pairing by nucleotide} ~
\begin{equation} \label{constraint:1}
\sum_{v<u} y^v_u + \sum_{v>u} y^u_v \leq 1 \qquad\qquad \forall u \in \llbracket 1,n \rrbracket
\end{equation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraints to forbid lonely base pairs} ~
% \begin{equation} \label{constraint:2}
% \sum_{v=u}^n y^{u-1}_v - \sum_{v=u+1}^n y^u_v + \sum_{v=u+2}^n y^{u+1}_v \geq 0 \qquad \qquad \forall u \in \llbracket 1,n\rrbracket
% \end{equation}
% \begin{equation} \label{constraint:3}
% \sum_{u=1}^{v-2} y^u_{v-1} - \sum_{u=1}^{v-1} y^u_v + \sum_{u=1}^{v} y^u_{v+1} \geq 0 \qquad \qquad \forall v \in \llbracket 1,n\rrbracket
% \end{equation}
% These conditions ensure that if a base pair exists with $s[i]$,
% one of the adjacent bases is paired too.
% Equation \ref{constraint:2} is useful if $s[u]$ is paired with $s[v>u]$ (a nucleotide later in the sequence),
% and equation \ref{constraint:3} if $s[v]$ is paired with $s[u<v]$ (a nucleotide earlier in the sequence).
\begin{equation} \label{constraint:2}
y^{u-1}_{v+1} - y^u_v + y^{u+1}_{v-1} \geq 0 \qquad \qquad \forall (u,v) \in \{ (u,v) \in \llbracket 1,n\rrbracket^2 \; | \; u + 3 <v \}
\end{equation}
A basepair should be accompanied by one of its neighbours, forming a stable structure stabilized by stacking energies. In theory, this might add up to \( \frac{1}{2}n^2\) constraints, but in practice, this number is very reasonable as
the only decision variables kept are those with probability above a $\theta$ threshold.
Then, this condition sets to zero "lonely decision variables" who have no neighbour basepair variable allowed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraint to forbid pairings inside a module component} ~
\begin{equation} \label{constraint:4}
(k_{x,i}-2) \; C^x_i + \sum_{u=P_{x,i}+1}^{P_{x,i}+k_{x,i}-2}\left[ \sum_{v>u} y^u_v + \sum_{v<u} y^v_u \right] \leq (k_{x,i} - 2)
\qquad \qquad \forall x \in M, i \in \llbracket 1,\|x\| \rrbracket
\end{equation}
If $C^x_i$ is set to 1, then the sum has to be zero. Obviously, this constraint prevents the program to correctly detect pseudoknots of HHH (kissing hairpins) and LL types (kissing higher-order loops), which is a limit of the approach.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraint to forbid component to overlap} ~
\begin{equation} \label{constraint:5}
\sum_{x \in M} \sum_{i=1}^{\|x\|} C^x_i \times I(P_{x,i}<u<P_{x,i}+k_{x,i}-1) \leq 1 \qquad \qquad \forall u \in \llbracket 1,n \rrbracket
\end{equation}
$I(P_{x,i}<u<P_{x,i}+k_{x,i}-1)$ is a boolean value depending on the condition's truth. Then, whatever the nucleotide $u$, it can be part of a module component only once.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraints to respect the structure of large motives ($\{ x\in M \; | \; \|x\| \geq 2\}$)} ~
This constraint ensures that none or all the components of a motif are inserted.
\begin{equation}\label{constraint:6}
\sum_{i=2}^{\|x\|} C^x_i = (\|x\| - 1) \times C^{x}_{1} \qquad \qquad \forall x \in \{ x\in M \; | \; \|x\| \geq 2\}
\end{equation}
And then, we force base pairs between the end of a component and the beginning of the next one:
\begin{equation}\label{constraint:7}
C^x_1 \leq y^{P_{x,1}}_{P_{x,\|x\|}+k_{x,\|x\|}-1} \qquad \qquad \forall x \in \{ x\in M \; | \; \|x\| \geq 2\}
\end{equation}
\begin{equation}\label{constraint:8}
C^x_j \leq y^{P_{x,j}+k_{x,j}-1}_{P_{x,j+1}} \qquad \qquad \forall x \in \{ x\in M \; | \; \|x\| \geq 2\}, \forall j \in \llbracket 1,\|x\| \llbracket
\end{equation}
Constraint \ref{constraint:7} binds the first nucleotide of first component to the last one of the last component.
Constraint \ref{constraint:8} binds the last nucleotide of component $j$ to the first of component $j+1$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Facultative constraint to forbid pseudoknots} ~
\begin{equation}\label{constraint:9}
y^u_v + y^k_l \leq 1 \qquad \qquad \forall u,v,k,l \text{ such as } 1\leq u<k<v<l\leq n
\end{equation}
To limit the number of constraints added, we obviously define the condition for allowed basepairs only ($u + 3 <v$, $k + 3 <l$, $p_{uv} > \theta$, $p_{kl} > \theta$).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paragraph{Constraint to forbid a previously found solution} ~
As several solutions may result in the same values of the two objectives, we can't forbid the algorithm to search twice the same region of the objective landscape.
We have to explicitly forbid to find again every found solution.\\
We do it by adding iteratively, for every structure $s^*$ found, the following condition :
\begin{equation}\label{constraint:10}
\sum_{y^u_v \in \{ y^u_v | y^u_v = 1 \text{ in } s^* \}} (1 - y^u_v) + \sum_{y^u_v \in \{ y^u_v | y^u_v = 0 \text{ in } s^* \}} y^u_v +
\sum_{C^x_i \in \{ C^x_i | C^x_i = 1 \text{ in } s^* \}} (1 - C^x_i) + \sum_{C^x_i \in \{ C^x_i |C^x_i = 0 \text{ in } s^* \}} C^x_i \geq 1
\end{equation}
It ensures that at least one of the decision variables differs from $s^*$.
\end{document}
\ No newline at end of file