Ludovic PLATON

Add new feature element in Featurer, remove KMER6

......@@ -9,391 +9,438 @@
#include "feature_callable.h"
namespace feature {
const std::vector<std::string> ORF::START{"AUG","ATG","TTG","UUG","CTG","CUG"};
const std::vector<std::string> ORF::END{"UAA","TAA","UAG","TAG","UGA","TGA"};
std::unordered_map<char,int> AN_ORDER{{'A',0},{'C',1},{'T',2},{'U',2},{'G',3}};
const std::string Feature::TYPE = "Feature";
const std::string SeqLength::TYPE = "SeqL";
const std::string Kmer::TYPE = "KMER";
const std::string ORF::TYPE = "ORF";
const std::string CodonPosition::TYPE = "CP";
const std::string Feature::ENTRY_TYPE = "None";
const std::string SeqLength::ENTRY_TYPE = "Sequence";
const std::string Kmer::ENTRY_TYPE = "Sequence";
const std::string ORF::ENTRY_TYPE = "Sequence";
const std::string CodonPosition::ENTRY_TYPE = "Sequence";
// Init func
void Feature::init(const Data_basic *db){
const Feature *tmp = static_cast<const Feature*>(db);
this->name = tmp->name;
}
const std::vector<std::string> ORF::START{"AUG","ATG","TTG","UUG","CTG","CUG"};
const std::vector<std::string> ORF::END{"UAA","TAA","UAG","TAG","UGA","TGA"};
std::unordered_map<char,int> AN_ORDER{{'A',0},{'C',1},{'T',2},{'U',2},{'G',3}};
const std::string Feature::TYPE = "Feature";
const std::string SeqLength::TYPE = "SeqL";
const std::string Kmer::TYPE = "KMER";
const std::string ORF::TYPE = "ORF";
const std::string CodonBiases::TYPE = "CP";
const std::string Feature::ENTRY_TYPE = "None";
const std::string SeqLength::ENTRY_TYPE = "Sequence";
const std::string Kmer::ENTRY_TYPE = "Sequence";
const std::string ORF::ENTRY_TYPE = "Sequence";
const std::string CodonBiases::ENTRY_TYPE = "Sequence";
// Init func
void Feature::init(const Data_basic *db){
const Feature *tmp = static_cast<const Feature*>(db);
this->name = tmp->name;
}
void SeqLength::init(const Data_basic *db){
const SeqLength *tmp = static_cast<const SeqLength*>(db);
this->l = tmp->l;
}
void SeqLength::init(const Data_basic *db){
const SeqLength *tmp = static_cast<const SeqLength*>(db);
this->l = tmp->l;
}
data::Data_basic *SeqLength::clone(){
return new SeqLength(this);
}
data::Data_basic *SeqLength::clone(){
return new SeqLength(this);
}
void Kmer::init(const Data_basic *db){
const Kmer *tmp = static_cast<const Kmer*>(db);
this->k = tmp->k;
this->kmer = tmp->kmer;
}
void Kmer::init(const Data_basic *db){
const Kmer *tmp = static_cast<const Kmer*>(db);
this->k = tmp->k;
this->kmer = tmp->kmer;
}
data::Data_basic *Kmer::clone(){
return new Kmer(this);
}
data::Data_basic *Kmer::clone(){
return new Kmer(this);
}
void ORF::init(const Data_basic *db){
const ORF *tmp = static_cast<const ORF*>(db);
this->length = tmp->length;
this->coverage = tmp->coverage;
}
void ORF::init(const Data_basic *db){
const ORF *tmp = static_cast<const ORF*>(db);
this->length = tmp->length;
this->coverage = tmp->coverage;
this->start_mean = tmp->start_mean;
this->start_std = tmp->start_std;
this->end_mean = tmp->end_mean;
this->end_std = tmp->end_std;
}
data::Data_basic *ORF::clone(){
return new ORF(this);
}
data::Data_basic *ORF::clone(){
return new ORF(this);
}
void CodonPosition::init(const Data_basic *db){
const CodonPosition *tmp = static_cast<const CodonPosition*>(db);
std::copy(std::begin(tmp->position),std::end(tmp->position),std::begin(this->position));
}
void CodonBiases::init(const Data_basic *db){
const CodonBiases *tmp = static_cast<const CodonBiases*>(db);
std::copy(std::begin(tmp->position),std::end(tmp->position),std::begin(this->position));
std::copy(std::begin(tmp->composition),std::end(tmp->composition),std::begin(this->composition));
}
data::Data_basic *CodonPosition::clone(){
return new CodonPosition(this);
}
data::Data_basic *CodonBiases::clone(){
return new CodonBiases(this);
}
// To string function
// To string function
std::string SeqLength::to_dict(){
return std::string("\"name\" :\"") + this->name + "\"\"SeqL\" : " + std::to_string(this->l);
}
std::string SeqLength::to_csv(){
return this->name+","+std::to_string(this->l);
}
std::string SeqLength::to_dict(){
return std::string("\"name\" :\"") + this->name + "\"\"SeqL\" : " + std::to_string(this->l);
}
int posKmer(std::string id,unsigned int k){
int pos(0);
for(unsigned int i(0); i < id.size(); i++){
int tmp = AN_ORDER[id[i]];
if(tmp>0){
pos += tmp*(int)(pow(4.0,k-i-1));
}
}
return pos;
}
std::string SeqLength::to_csv(){
return this->name+","+std::to_string(this->l);
}
std::string Kmer::to_dict(){
std::string tmp(std::string("\"name\" :\"") + this->name+"\"");
auto it = this->kmer.begin();
while(it != this->kmer.end()){
tmp += ", \""+it->first + "\": " + std::to_string(it->second);
++it;
int posKmer(std::string id,unsigned int k){
int pos(0);
for(unsigned int i(0); i < id.size(); i++){
int tmp = AN_ORDER[id[i]];
if(tmp>0){
pos += tmp*(int)(pow(4.0,k-i-1));
}
return tmp;
}
std::string Kmer::to_csv(){
std::vector<float> vect((int)pow(4.0,k),0.0);
auto it = this->kmer.begin();
while(it != this->kmer.end()){
vect[posKmer(it->first,this->k)] = it->second;
++it;
}
std::string tmp(this->name);
for(unsigned int i(0); i < vect.size(); i++){
tmp += ","+ std::to_string(vect[i]);
}
return tmp;
return pos;
}
std::string Kmer::to_dict(){
std::string tmp(std::string("\"name\" :\"") + this->name+"\"");
auto it = this->kmer.begin();
while(it != this->kmer.end()){
tmp += ", \""+it->first + "\": " + std::to_string(it->second);
++it;
}
return tmp;
}
std::string ORF::to_dict(){
return std::string("\"name\" :\"") + this->name + "\"\"Length\": " + std::to_string(this->length) + ", \"ORF\" : " + std::to_string(this->coverage);
std::string Kmer::to_csv(){
std::vector<float> vect((int)pow(4.0,k),0.0);
auto it = this->kmer.begin();
while(it != this->kmer.end()){
vect[posKmer(it->first,this->k)] = it->second;
++it;
}
std::string ORF::to_csv(){
return this->name+","+std::to_string(this->length) + ","+ std::to_string(this->coverage);
std::string tmp(this->name);
for(unsigned int i(0); i < vect.size(); i++){
tmp += ","+ std::to_string(vect[i]);
}
return tmp;
}
std::string ORF::to_dict(){
return std::string("\"name\" :\"") + this->name + "\"\"Length\": " + std::to_string(this->length) + ", \"Coverage\" : " + std::to_string(this->coverage) + ", \"Start mean\" : " + std::to_string(this->start_mean) + ", \"Start std\" : " + std::to_string(this->start_std) + ", \"End mean\" : " + std::to_string(this->end_mean) + ", \"End std\" : " + std::to_string(this->end_std);
}
std::string ORF::to_csv(){
return this->name+","+std::to_string(this->length) + ","+ std::to_string(this->coverage) + "," + std::to_string(this->start_mean) + "," + std::to_string(this->start_std) + "," + std::to_string(this->end_mean) + "," + std::to_string(this->end_std);
}
std::string CodonPosition::to_dict(){
return std::string("\"name\" :\"") + this->name + "\", \"A\" : " + std::to_string(this->position[0]) +
", \"C\" : " + std::to_string(this->position[1]) +
", \"T\" : " + std::to_string(this->position[2]) +
", \"G\" : " + std::to_string(this->position[3]);
std::string CodonBiases::to_dict(){
return std::string("\"name\" :\"") + this->name +
"\", \"Position A\" : " + std::to_string(this->position[0]) +
", \"Position C\" : " + std::to_string(this->position[1]) +
", \"Position T\" : " + std::to_string(this->position[2]) +
", \"Position G\" : " + std::to_string(this->position[3])+
", \"Composition A\" : " + std::to_string(this->composition[0]) +
", \"Composition C\" : " + std::to_string(this->composition[1]) +
", \"Composition T\" : " + std::to_string(this->composition[2]) +
", \"Composition G\" : " + std::to_string(this->composition[3]);
}
std::string CodonBiases::to_csv(){
return this->name+","+
std::to_string(this->position[0]) + ","+
std::to_string(this->position[1]) + ","+
std::to_string(this->position[2]) + ","+
std::to_string(this->position[3]) + ","+
std::to_string(this->composition[0]) + ","+
std::to_string(this->composition[1]) + ","+
std::to_string(this->composition[2]) + ","+
std::to_string(this->composition[3]);
}
// Constructor
// Kmer
Kmer::Kmer(entry::Sequence &s, const unsigned int &k):Feature(s.getName()),k(k){
std::string seq = s.getSeq();
for(unsigned int i(0); i < s.getLength() - this->k - 1; i++){
std::string kmer = seq.substr(i,this->k);
std::transform(kmer.begin(),kmer.end(),kmer.begin(),::toupper);
std::replace(kmer.begin(),kmer.end(),'U','T');
std::unordered_map<std::string,float>::iterator kmer_it = this->kmer.find(kmer);
if(kmer_it != this->kmer.end()){
kmer_it->second ++;
}else{
this->kmer[kmer] = 1;
}
}
std::string CodonPosition::to_csv(){
return this->name+","+
std::to_string(this->position[0]) + ","+
std::to_string(this->position[1]) + ","+
std::to_string(this->position[2]) + ","+
std::to_string(this->position[3]);
// compute frequence
for(std::unordered_map<std::string,float>::iterator it = this->kmer.begin(); it!= this->kmer.end();++it){
it->second /=(s.getLength()-(it->first.length()));
}
}
// Constructor
// Kmer
Kmer::Kmer(entry::Sequence &s, const unsigned int &k):Feature(s.getName()),k(k){
std::string seq = s.getSeq();
for(unsigned int i(0); i < s.getLength() - this->k - 1; i++){
std::string kmer = seq.substr(i,this->k);
std::transform(kmer.begin(),kmer.end(),kmer.begin(),::toupper);
std::replace(kmer.begin(),kmer.end(),'U','T');
std::unordered_map<std::string,float>::iterator kmer_it = this->kmer.find(kmer);
if(kmer_it != this->kmer.end()){
kmer_it->second ++;
}else{
this->kmer[kmer] = 1;
}
// ORF
ORF::ORF(const entry::Sequence &s):Feature(s.getName()),length(0){
std::string seq = s.getSeq();
std::transform(seq.begin(),seq.end(),seq.begin(),toupper);
std::vector<unsigned int> start_codon;
std::vector<unsigned int> end_codon;
std::pair<unsigned int, unsigned int> orf_pair;
//find position codon start and stop
for(unsigned int i(0); i < seq.length(); i++){
std::string tmp = seq.substr(i,3);
if(ORF::in_start(tmp)){
start_codon.push_back(i);
}
// compute frequence
for(std::unordered_map<std::string,float>::iterator it = this->kmer.begin(); it!= this->kmer.end();++it){
it->second /=(s.getLength()-(it->first.length()));
if(ORF::in_end(tmp)){
end_codon.push_back(i);
}
}
// ORF
ORF::ORF(const entry::Sequence &s):Feature(s.getName()),length(0){
std::string seq = s.getSeq();
std::transform(seq.begin(),seq.end(),seq.begin(),toupper);
std::vector<unsigned int> start_codon;
std::vector<unsigned int> end_codon;
std::pair<unsigned int, unsigned int> orf_pair;
//find position codon start and stop
for(unsigned int i(0); i < seq.length(); i++){
std::string tmp = seq.substr(i,3);
if(ORF::in_start(tmp)){
start_codon.push_back(i);
// ORF detection
for(auto const &start_pos: start_codon){
if(!end_codon.empty()){
if(end_codon.back() - start_pos < this->length){
break;
}
if(ORF::in_end(tmp)){
end_codon.push_back(i);
}
}
// ORF detection
for(auto const &start_pos: start_codon){
if(!end_codon.empty()){
if(end_codon.back() - start_pos < this->length){
break;
}
for(auto const &end_pos: end_codon){
if(start_pos < end_pos){
unsigned int length = end_pos - start_pos;
if((length%3 == 0)){
if(length > this->length){
orf_pair = std::pair<unsigned int, unsigned int>(start_pos,end_pos);
this->length = length;
}
break;
for(auto const &end_pos: end_codon){
if(start_pos < end_pos){
unsigned int length = end_pos - start_pos;
if((length%3 == 0)){
if(length > this->length){
orf_pair = std::pair<unsigned int, unsigned int>(start_pos,end_pos);
this->length = length;
}
break;
}
}
}
}
this->coverage = static_cast<float>(this->length) / s.getLength();
}
// Codon position
CodonPosition::CodonPosition(const entry::Sequence &seq):Feature(seq.getName()){
std::string s = seq.getSeq();
std::valarray<int> mat(4*3);
for(unsigned int i(0); i < s.length(); i++){
//std::cout << s.at(i) << " / " << i << std::endl;
switch(s.at(i)){
case 'A':
case 'a':
mat[0+(i%3)] += 1;
break;
case 'C':
case 'c':
mat[3+(i%3)] += 1;
break;
case 'T':
case 't':
case 'U':
case 'u':
mat[6+(i%3)] += 1;
break;
case 'G':
case 'g':
mat[9+(i%3)] += 1;
break;
}
}
for(int i(0); i < 4; i++){
std::valarray<int> tmp(mat[std::slice(i*3,3,1)]);
auto mm = std::minmax_element(begin(tmp),end(tmp));
this->position[i] = static_cast<float>(*mm.first)/
(static_cast<float>(*mm.second) + 1.0);
}
this->coverage = static_cast<float>(this->length) / s.getLength();
this->length = log10(this->length);
this->start_mean = 0;
for(auto const &start_pos: start_codon){
this->start_mean += start_pos;
}
this->start_mean /= start_codon.size();
this->start_std = 0;
for(auto const &start_pos: start_codon){
this->start_std += pow(start_pos - this->start_mean,2.0);
}
this->start_std = sqrt(this->start_mean)/start_codon.size() / s.getLength();
this->start_mean /= s.getLength();
// Dist
float Kmer::dist(Feature *ka){
Kmer* ptr_a = static_cast<Kmer*>(ka);
Kmer a(ptr_a);
std::unordered_map<std::string,float> b_kmer = this->kmer;
float dist = 0;
while(a.kmer.size() > 0){
auto it_a = a.kmer.begin();
auto it_b = b_kmer.find(it_a->first);
float value_b = 0;
if(it_b != b_kmer.end()){
value_b = it_b->second;
b_kmer.erase(it_b);
}
dist += std::abs(it_a->second - value_b);
a.kmer.erase(it_a);
}
for(auto it_b = b_kmer.begin(); it_b!= b_kmer.end();++it_b){
dist += std::abs(it_b->second);
}
return dist;
this->end_mean = 0;
for(auto const &end_pos: end_codon){
this->end_mean += end_pos;
}
this->end_mean /= end_codon.size();
float SeqLength::dist(Feature *a){
SeqLength *tmp = static_cast<SeqLength*>(a);
float res = static_cast<float>(std::abs(this->l - tmp->l));
return res;
this->end_std = 0;
for(auto const &end_pos: end_codon){
this->end_std += pow(end_pos - this->end_mean,2.0);
}
this->end_std = sqrt(this->end_mean)/end_codon.size() / s.getLength();
this->end_mean /= s.getLength();
}
float ORF::dist(Feature *a){
ORF *tmp = static_cast<ORF*>(a);
float res = std::abs(this->coverage - tmp->coverage);
return res;
// Codon position
CodonBiases::CodonBiases(const entry::Sequence &seq):Feature(seq.getName()){
std::string s = seq.getSeq();
std::valarray<int> mat(4*3);
for(unsigned int i(0); i < s.length(); i++){
//std::cout << s.at(i) << " / " << i << std::endl;
switch(s.at(i)){
case 'A':
case 'a':
mat[0+(i%3)] += 1;
this->composition[0] += 1;
break;
case 'C':
case 'c':
mat[3+(i%3)] += 1;
this->composition[1] += 1;
break;
case 'T':
case 't':
case 'U':
case 'u':
mat[6+(i%3)] += 1;
this->composition[2] += 1;
break;
case 'G':
case 'g':
mat[9+(i%3)] += 1;
this->composition[3] += 1;
break;
}
}
for(int i(0); i < 4; i++){
std::valarray<int> tmp(mat[std::slice(i*3,3,1)]);
auto mm = std::minmax_element(begin(tmp),end(tmp));
this->position[i] = static_cast<float>(*mm.first)/
(static_cast<float>(*mm.second) + 1.0);
this->composition[i] /= seq.getLength();
}
}
float CodonPosition::dist(Feature *a){
CodonPosition *tmp = static_cast<CodonPosition*>(a);
float res(0.0);
for(int i(0); i < 4; i++){
res += std::abs(this->position[i] - tmp->position[i]);
// Dist
float Kmer::dist(Feature *ka){
Kmer* ptr_a = static_cast<Kmer*>(ka);
Kmer a(ptr_a);
std::unordered_map<std::string,float> b_kmer = this->kmer;
float dist = 0;
while(a.kmer.size() > 0){
auto it_a = a.kmer.begin();
auto it_b = b_kmer.find(it_a->first);
float value_b = 0;
if(it_b != b_kmer.end()){
value_b = it_b->second;
b_kmer.erase(it_b);
}
return res;
dist += std::abs(it_a->second - value_b);
a.kmer.erase(it_a);
}
for(auto it_b = b_kmer.begin(); it_b!= b_kmer.end();++it_b){
dist += std::abs(it_b->second);
}
return dist;
}
// Other
float SeqLength::dist(Feature *a){
SeqLength *tmp = static_cast<SeqLength*>(a);
float res = static_cast<float>(std::abs(this->l - tmp->l));
return res;
}
int SeqLength::getL() const
{
return l;
}
float ORF::dist(Feature *a){
ORF *tmp = static_cast<ORF*>(a);
float res = std::abs(this->coverage - tmp->coverage);
return res;
}
unsigned int Kmer::getK() const
{
return k;
float CodonBiases::dist(Feature *a){
CodonBiases *tmp = static_cast<CodonBiases*>(a);
float res(0.0);
for(int i(0); i < 4; i++){
res += std::abs(this->position[i] - tmp->position[i]);
}
return res;
}
std::unordered_map<std::string, float> Kmer::getKmer() const
{
return kmer;
}
// Other
bool ORF::in_start(const std::string &c){
for(auto const &codon: ORF::START){
if(c.compare(codon)==0){
return true;
}
int SeqLength::getL() const
{
return l;
}
unsigned int Kmer::getK() const
{
return k;
}
std::unordered_map<std::string, float> Kmer::getKmer() const
{
return kmer;
}
bool ORF::in_start(const std::string &c){
for(auto const &codon: ORF::START){
if(c.compare(codon)==0){
return true;
}
return false;
}
bool ORF::in_end(const std::string &c){
for(auto const &codon: ORF::END){
if(c.compare(codon)==0){
return true;
}
return false;
}
bool ORF::in_end(const std::string &c){
for(auto const &codon: ORF::END){
if(c.compare(codon)==0){
return true;
}
return false;
}
return false;
}
unsigned int ORF::getLength() const
{
return length;
}
unsigned int ORF::getLength() const
{
return length;
}
float ORF::getCoverage() const
{
return coverage;
}
float ORF::getCoverage() const
{
return coverage;
}
unsigned int ORF::getMax_length() const
{
return length;
}
unsigned int ORF::getMax_length() const
{
return length;
}
const float *CodonPosition::getPosition(){
return this->position;
}
const float *CodonBiases::getPosition(){
return this->position;
}
/*
/*
* Callable definition
*/
class SeqLength_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new SeqLength(*tmp);
}
public:
SeqLength_callable(){}
};
class Kmer_callable: public feature::Feature_creation{
protected:
unsigned int k;
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new Kmer(*tmp,this->k);
}
public:
Kmer_callable(const unsigned int &k):k(k){}
};
class ORF_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new ORF(*tmp);
}
public:
ORF_callable(){}
};
class CodonPosition_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new CodonPosition(*tmp);
}
public:
CodonPosition_callable(){}
};
class SeqLength_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new SeqLength(*tmp);
}
public:
SeqLength_callable(){}
};
class Kmer_callable: public feature::Feature_creation{
protected:
unsigned int k;
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new Kmer(*tmp,this->k);
}
public:
Kmer_callable(const unsigned int &k):k(k){}
};
class ORF_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new ORF(*tmp);
}
public:
ORF_callable(){}
};
class CodonBiases_callable: public feature::Feature_creation{
protected:
feature::Feature *create_feature(data::Data_basic *e){
entry::Sequence *tmp = dynamic_cast<entry::Sequence*>(e);
return new CodonBiases(*tmp);
}
public:
CodonBiases_callable(){}
};
callable::Callable<data::Data_basic*,data::Data_basic*> *SeqLength::get_callable(){
return new feature::SeqLength_callable();
}
callable::Callable<data::Data_basic*,data::Data_basic*> *SeqLength::get_callable(){
return new feature::SeqLength_callable();
}
callable::Callable<data::Data_basic*,data::Data_basic*> *Kmer::get_callable(const unsigned int &k){
return new feature::Kmer_callable(k);
}
callable::Callable<data::Data_basic*,data::Data_basic*> *Kmer::get_callable(const unsigned int &k){
return new feature::Kmer_callable(k);
}
callable::Callable<data::Data_basic*,data::Data_basic*> *ORF::get_callable(){
return new feature::ORF_callable();
}
callable::Callable<data::Data_basic*,data::Data_basic*> *ORF::get_callable(){
return new feature::ORF_callable();
}
callable::Callable<data::Data_basic*,data::Data_basic*> *CodonBiases::get_callable(){
return new feature::CodonBiases_callable();
}
callable::Callable<data::Data_basic*,data::Data_basic*> *CodonPosition::get_callable(){
return new feature::CodonPosition_callable();
}
CodonBiases::~CodonBiases(){}
}
......
......@@ -10,161 +10,170 @@
#include "data/data_basic.h"
namespace feature{
class Feature : public data::Data_basic{
protected:
virtual void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
Feature(std::string name):data::Data_basic(name){}
Feature(const data::Data_basic *db):data::Data_basic(db){this->init(db);}
virtual ~Feature(){}
virtual float dist(feature::Feature *a)=0;
std::string to_dict()=0;
std::string to_csv()=0;
virtual std::string getID(){
return this->TYPE;
}
virtual data::Data_basic *clone()=0;
std::string getEntry_name() const;
};
class SeqLength: public feature::Feature{
protected:
int l;
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
int getL() const;
SeqLength(const entry::Sequence &s):
feature::Feature(s.getName()),l(s.getLength()){}
SeqLength(const std::string &name,const int &seql):
feature::Feature(name),l(seql){}
SeqLength(const data::Data_basic *db):feature::Feature(db){this->init(db);}
~SeqLength(){}
float dist(Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
class Kmer: public feature::Feature{
protected:
unsigned int k;
std::unordered_map<std::string,float> kmer;
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable(const unsigned int &k);
Kmer(entry::Sequence &s, const unsigned int &k);
Kmer(const std::string &name,
const unsigned int &k, const std::unordered_map<std::string,float> kmer):
feature::Feature(name),
k(k),kmer(kmer){}
Kmer(const data::Data_basic *db):feature::Feature(db){this->init(db);}
~Kmer(){}
float dist(feature::Feature *a);
unsigned int getK() const;
std::unordered_map<std::string, float> getKmer() const;
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
class ORF: public feature::Feature{
protected:
static const std::vector<std::string> START;
static const std::vector<std::string> END;
static bool in_start(const std::string &c);
static bool in_end(const std::string &c);
unsigned int length;
float coverage;
/*
class Feature : public data::Data_basic{
protected:
virtual void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
Feature(std::string name):data::Data_basic(name){}
Feature(const data::Data_basic *db):data::Data_basic(db){this->init(db);}
virtual ~Feature(){}
virtual float dist(feature::Feature *a)=0;
std::string to_dict()=0;
std::string to_csv()=0;
virtual std::string getID(){
return this->TYPE;
}
virtual data::Data_basic *clone()=0;
std::string getEntry_name() const;
};
class SeqLength: public feature::Feature{
protected:
int l;
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
int getL() const;
SeqLength(const entry::Sequence &s):
feature::Feature(s.getName()),l(s.getLength()){}
SeqLength(const std::string &name,const int &seql):
feature::Feature(name),l(seql){}
SeqLength(const data::Data_basic *db):feature::Feature(db){this->init(db);}
~SeqLength(){}
float dist(Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
class Kmer: public feature::Feature{
protected:
unsigned int k;
std::unordered_map<std::string,float> kmer;
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable(const unsigned int &k);
Kmer(entry::Sequence &s, const unsigned int &k);
Kmer(const std::string &name,
const unsigned int &k, const std::unordered_map<std::string,float> kmer):
feature::Feature(name),
k(k),kmer(kmer){}
Kmer(const data::Data_basic *db):feature::Feature(db){this->init(db);}
~Kmer(){}
float dist(feature::Feature *a);
unsigned int getK() const;
std::unordered_map<std::string, float> getKmer() const;
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
class ORF: public feature::Feature{
protected:
static const std::vector<std::string> START;
static const std::vector<std::string> END;
static bool in_start(const std::string &c);
static bool in_end(const std::string &c);
unsigned int length;
float coverage;
float start_mean;
float start_std;
float end_mean;
float end_std;
/*
float start_center, start_std;
float end_center, end_std;
*/
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
~ORF(){}
ORF(const entry::Sequence &s);
ORF(const std::string &name,
const unsigned int &length, const float &coverage):
feature::Feature(name),
length(length),coverage(coverage){}
ORF(const data::Data_basic *db):feature::Feature(db){this->init(db);}
float dist(feature::Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
float getCoverage() const;
float getStart_center() const;
float getStart_std() const;
float getEnd_center() const;
float getEnd_std() const;
unsigned int getMax_length() const;
unsigned int getLength() const;
};
class CodonPosition: public feature::Feature{
protected:
float position[4];
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
~CodonPosition(){}
CodonPosition(const entry::Sequence &s);
CodonPosition(const std::string &name,
const float (&position)[4]):
feature::Feature(name){
std::copy(std::begin(position),std::end(position),std::begin(this->position));
}
CodonPosition(const data::Data_basic *db):feature::Feature(db){this->init(db);}
const float *getPosition();
float dist(Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
~ORF(){}
ORF(const entry::Sequence &s);
ORF(const std::string &name,
const unsigned int &length, const float &coverage):
feature::Feature(name),
length(length),coverage(coverage){}
ORF(const data::Data_basic *db):feature::Feature(db){this->init(db);}
float dist(feature::Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
float getCoverage() const;
float getStart_center() const;
float getStart_std() const;
float getEnd_center() const;
float getEnd_std() const;
unsigned int getMax_length() const;
unsigned int getLength() const;
};
class CodonBiases: public feature::Feature{
protected:
float position[4] = {0,0,0,0};
float composition[4] = {0,0,0,0};
void init(const Data_basic *db);
public:
static const std::string TYPE;
static const std::string ENTRY_TYPE;
static callable::Callable<data::Data_basic*,data::Data_basic*> *get_callable();
~CodonBiases();
CodonBiases(const entry::Sequence &s);
CodonBiases(
const std::string &name,
const float (&position)[4],
const float (&composition)[4]):
feature::Feature(name){
std::copy(std::begin(position),std::end(position),std::begin(this->position));
std::copy(std::begin(composition),std::end(composition),std::begin(this->composition));
}
CodonBiases(const data::Data_basic *db):feature::Feature(db){this->init(db);}
const float *getPosition();
float dist(Feature *a);
data::Data_basic *clone();
std::string to_dict();
std::string to_csv();
std::string getID(){
return this->TYPE;
}
};
}
#endif // FEATURE_H
......
......@@ -57,10 +57,10 @@ int main(int argc, char* argv[])
ds["KMER3"] = tmp_ds;
create_feature_maker(kmer,feature::Kmer::TYPE+std::to_string(6),tmp_ds,&re);
kmer = static_cast<feature::Feature_callable*>(feature::Kmer::get_callable(6));
tmp_ds = new data::Data_Store_Map(output+"KMER6.txt");
ds["KMER6"] = tmp_ds;
create_feature_maker(kmer,feature::Kmer::TYPE+std::to_string(6),tmp_ds,&re);
// kmer = static_cast<feature::Feature_callable*>(feature::Kmer::get_callable(6));
// tmp_ds = new data::Data_Store_Map(output+"KMER6.txt");
// ds["KMER6"] = tmp_ds;
// create_feature_maker(kmer,feature::Kmer::TYPE+std::to_string(6),tmp_ds,&re);
/* *********** ORF *********************/
/* ORF callable */
......@@ -70,12 +70,12 @@ int main(int argc, char* argv[])
ds["ORF"] = tmp_ds;
create_feature_maker(orf,feature::ORF::TYPE,tmp_ds,&re);
/* *********** Codon Position ***************/
/* *********** Codon Biases ***************/
feature::Feature_callable *cp = static_cast<feature::Feature_callable*>(feature::CodonPosition::get_callable());
tmp_ds = new data::Data_Store_Map(output+"CP.txt");
ds["CP"] = tmp_ds;
create_feature_maker(cp,feature::CodonPosition::TYPE,tmp_ds,&re);
feature::Feature_callable *cb = static_cast<feature::Feature_callable*>(feature::CodonBiases::get_callable());
tmp_ds = new data::Data_Store_Map(output+"CB.txt");
ds["CB"] = tmp_ds;
create_feature_maker(cb,feature::CodonBiases::TYPE,tmp_ds,&re);
/* *********** Fasta reader *************/
......