fasta.h 2.5 KB
#ifndef FASTA_H
#define FASTA_H

#include <string>
#include <utility>
#include <fstream>
#include <exception>
#include <vector>

#include "../multithread/buffer.h"
#include "../multithread/buffer_producer.h"
#include "../Importer/entry.h"

#include "../util.h"

/**
 * @file fasta.h
 * @brief The Fasta class definition
 * @author Ludovic Platon
 */

/**
 * @constructor import
 *
 * This namespace contain all class used for the data importation
 */

namespace import {

	class unknown_path : public std::exception{
			std::string error_string;
		public:
			unknown_path(std::string path):error_string("Unknown path :"+path){}
			virtual ~unknown_path() throw(){}
			virtual const char *what() const throw(){
				return error_string.c_str();
			}
	};
	/**
	 * @brief The Fasta class
	 * @class Fasta
	 *
	 * This class is used to receive import sequence in fasta format
	 */
	class Fasta : public multithread::Buffer_producer<data::Data_basic*>{
			std::string file;
			unsigned int length_min;
		protected:
			void work_out(){
				std::cout << "Start fasta reader : " << this->file << std::endl;
				std::ifstream input(this->file);
				if(!input.good()){
					unknown_path up(this->file);
					throw up;
				}else{
					std::string line;
					std::string id;
					std::string sequence;
					std::vector<std::string> string_split;
					int idx(1);
					while(std::getline(input,line).good()){
						if(line.empty() || line[0] == '>'){
							if(!id.empty() && sequence.size() > this->length_min){
								this->push(new entry::Sequence(id,sequence));
								idx ++;
							}
							if(!line.empty()){
								id.clear();
								sequence.clear();
								tokenize<std::vector<std::string>>(line.substr(1),string_split," ",true);
								id = string_split[0];
								string_split.clear();
							}
						}else if(!id.empty()){
							if( line.find(' ') != std::string::npos ){
								// no space in sequence
								id.clear();
								sequence.clear();
							} else {
								sequence += line;
							}
						}
					}
					if(!id.empty() && sequence.size() > this->length_min){
						this->push(new entry::Sequence(id,sequence));
					}
					std::cout << "End fasta reader" << std::endl;
				}
			}

		public:
			/**
	 * @brief Fasta constructor
	 * @param file a std::string, a path to a fasta file
	 * @param buffer_size a int, the size of the internal buffer
	 */
			Fasta(multithread::Buffer<data::Data_basic*> *out,std::string file):
				multithread::Buffer_producer<data::Data_basic*>(out),file(file),length_min(5){}
	};
}


#endif // FASTA_H