fasta.h
2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#ifndef FASTA_H
#define FASTA_H
#include <string>
#include <utility>
#include <fstream>
#include <exception>
#include <vector>
#include "../multithread/buffer.h"
#include "../multithread/buffer_producer.h"
#include "../Importer/entry.h"
#include "../util.h"
/**
* @file fasta.h
* @brief The Fasta class definition
* @author Ludovic Platon
*/
/**
* @constructor import
*
* This namespace contain all class used for the data importation
*/
namespace import {
class unknown_path : public std::exception{
std::string error_string;
public:
unknown_path(std::string path):error_string("Unknown path :"+path){}
virtual ~unknown_path() throw(){}
virtual const char *what() const throw(){
return error_string.c_str();
}
};
/**
* @brief The Fasta class
* @class Fasta
*
* This class is used to receive import sequence in fasta format
*/
class Fasta : public multithread::Buffer_producer<data::Data_basic*>{
std::string file;
unsigned int length_min;
protected:
void work_out(){
std::cout << "Start fasta reader : " << this->file << std::endl;
std::ifstream input(this->file);
if(!input.good()){
unknown_path up(this->file);
throw up;
}else{
std::string line;
std::string id;
std::string sequence;
std::vector<std::string> string_split;
int idx(1);
while(std::getline(input,line).good()){
if(line.empty() || line[0] == '>'){
if(!id.empty() && sequence.size() > this->length_min){
this->push(new entry::Sequence(id,sequence));
idx ++;
}
if(!line.empty()){
id.clear();
sequence.clear();
tokenize<std::vector<std::string>>(line.substr(1),string_split," ",true);
id = string_split[0];
string_split.clear();
}
}else if(!id.empty()){
if( line.find(' ') != std::string::npos ){
// no space in sequence
id.clear();
sequence.clear();
} else {
sequence += line;
}
}
}
if(!id.empty() && sequence.size() > this->length_min){
this->push(new entry::Sequence(id,sequence));
}
std::cout << "End fasta reader" << std::endl;
}
}
public:
/**
* @brief Fasta constructor
* @param file a std::string, a path to a fasta file
* @param buffer_size a int, the size of the internal buffer
*/
Fasta(multithread::Buffer<data::Data_basic*> *out,std::string file):
multithread::Buffer_producer<data::Data_basic*>(out),file(file),length_min(5){}
};
}
#endif // FASTA_H