utils.cpp 7.78 KB
#include "utils.h"

#include <dlib/clustering.h>
#include <sys/stat.h>
#include <boost/algorithm/string.hpp>

#include <iostream>
#include <algorithm>
#include <iterator>
#include <sstream>
#include <numeric>

int find_bp(std::vector < std::pair < unsigned int, unsigned int > > listBp, std::pair< unsigned int, unsigned int > p)
{
    std::vector < std::pair< unsigned int, unsigned int > >::iterator it = std::find(listBp.begin(), listBp.end(), p);
    int r = -1;
    if(it != listBp.end())
        r = std::distance(listBp.begin(), it);
    return r;
}

std::vector < unsigned int > find_bp_with_i (std::vector < std::pair < unsigned int, unsigned int > > listBp, unsigned int i)
{
    std::vector < unsigned int > res = std::vector< unsigned int > ();
    for(unsigned int j = 0; j < listBp.size(); j++) {
        if(listBp[j].first == i or listBp[j].second == i)
                res.push_back(j);
    }
    return res;
}

std::vector< int > find_bp_with_i(
        std::vector < std::pair < std::pair < uint, uint >, std::pair < uint, uint > > > listBp,
        std::pair < uint, uint > i) {
    std::vector < int > res;
    for(unsigned int j = 0; j < listBp.size(); j++) {
        if(listBp[j].first == i or listBp[j].second == i)
                res.push_back(j);
    }
    return res;
}

bool find_i_at_first(std::vector < std::pair < unsigned int, unsigned int > > v, unsigned int i )
{
    bool res = false;
    for(size_t j = 0, size = v.size(); j != size and !res; j++) {
        if(v[j].first == i)
                res = true;
    }
    return res;
}

bool find_i_at_second(std::vector < std::pair < unsigned int, unsigned int > > v, unsigned int i )
{
    bool res = false;
    for(size_t j = 0, size = v.size(); j != size and !res; j++) {
        if(v[j].second == i)
                res = true;
    }
    return res;
}

int find_i_at_first_int(std::vector < std::pair < unsigned int, unsigned int > > v, unsigned int i )
{
    int res = -1;
    for(size_t j = 0, size = v.size(); j != size and res == -1; j++) {
        if(v[j].first == i)
                res = j;
    }
    return res;
}

int find_i_at_second_int(std::vector < std::pair < unsigned int, unsigned int > > v, unsigned int i )
{
    int res = -1;
    for(size_t j = 0, size = v.size(); j != size and res == -1; j++) {
        if(v[j].second == i)
                res = j;
    }
    return res;
}



bool app(const char c1, const char c2)
{
    std::stringstream ss;
    ss << c1 << c2;
    std::string bp;
    ss >> bp;
    if (bp.compare("AU") != 0 and bp.compare("UA") != 0 and
            bp.compare("UG") != 0 and bp.compare("GU") != 0 and
            bp.compare("GC") != 0 and bp.compare("CG") != 0)
        return false;
    return true;
}

// Check if a string is an integer >= 0 and <= 100
bool checkIC(std::string ic) {
    bool res = true;
    int e;
    try {
        e = std::stoi(ic);
        if (e < 0 or e > 100)
            res = false;
    } catch (std::string e) {
        throw (std::string("Invalid confidence index."));
    }
    return res;
}

/* return 1 if c1 dominates c2,
 * return 0 if c1 is not comparable to c2
 * return -1 if c1 is dominated by c2 */
int dominate(std::vector < float > c1, std::vector < float > c2) {
    /* first objective : max constraint
     * second objective : max compatibility
     * third objective : min energy
     * fourth objective : max probing data score */

    std::vector < int > strict = std::vector < int > (c1.size(), 0);
    int dominate = 0;

    for(size_t i = 0, size = c1.size(); i < size; i++) {
        if (i == 2) { // min energy
            if (c1[i] < c2[i])
                strict[i] = 1;
            else if (c1[i] > c2[i])
                strict[i] = -1;
        } else { // max
            if (c1[i] > c2[i])
                strict[i] = 1;
            else if (c1[i] < c2[i])
                strict[i] = -1;
        }
    }

    int sum = std::accumulate(strict.begin(), strict.end(),0);

    if (sum == int(c1.size())){
        dominate = 1;
    } else if (sum == -(int(c1.size()))){
        dominate = -1;
    }

    return dominate;
}

void readFeatureVector(std::vector < dlib::matrix < float > > &features){

    std::vector < std::vector < int > > featureIds;
    std::vector < std::vector < float > > featureValues;
    std::vector < int > commonFeatureIds, intersec;

    int nline = 0;
    std::string name = "graphNSPDK.txt.feature", line = "";
    std::vector < std::string > elements, elements2;
    struct stat buf;
    if( (stat(name.c_str(), &buf) == 0)) {
        std::ifstream ifs(name);
        while (std::getline(ifs, line)) {

            if (line[0] != '\n' and line != "") {
                featureIds.push_back(std::vector < int >());
                featureValues.push_back(std::vector < float >());
                boost::split( elements, line, boost::is_any_of(" \t"), boost::token_compress_on );
                uint count = 0;
                for(size_t i = 0, size = elements.size(); i != size; i++) {
                    if (elements[i] != "") {
                        boost::split(elements2, elements[i], boost::is_any_of(":"), boost::token_compress_on);
                        featureIds[nline].push_back(std::stoi(elements2[0]));
                        featureValues[nline].push_back(std::stof(elements2[1]));
                        count++;
                    }
                }
                std::cout << " count = " << count << std::endl;
            }
            nline++;
        }
        ifs.close();
    }
    // Intersection of all the feature ids
    commonFeatureIds = featureIds[0];
    for (size_t i = 1, size = featureIds.size(); i != size; i++) {
        std::set_intersection(commonFeatureIds.begin(), commonFeatureIds.end(),
                          featureIds[i].begin(), featureIds[i].end(),
                          std::back_inserter(intersec));
        commonFeatureIds = intersec;
        intersec.clear();
    }
    std::cout  << "commonFeatureIds.size = " << commonFeatureIds.size() << std::endl;
    // Recover only the common features
    for(size_t i = 0, size = featureIds.size(); i != size; i++) {
        for (size_t j = 0, size2 = featureIds[i].size(); j != size2; j++) {
            if (featureIds[i][j] != commonFeatureIds[j]) {
                featureIds[i].erase(featureIds[i].begin() + j);
                featureValues[i].erase(featureValues[i].begin() + j);
                size2--;
                j--;
            }
        }
    }

    // Put into the common features into a matrix
    dlib::matrix<float>  feature;
    feature.set_size(uint(featureIds[0].size()), 1);    // nrow = x and only one column
    for(size_t i = 0, size = featureValues.size(); i != size; i++) {
        for (size_t j = 0, size2 = featureValues[i].size(); j != size2; j++) {
            feature(long(j)) = featureValues[i][j];
        }
        features.push_back(feature);
    }
}

void readKernelMatrix(dlib::matrix < double > &K, uint length) {

    K.set_size(length, length);
    std::string name = "graphNSPDK.txt.kernel", line = "";
    std::vector < std::string > elements;
    struct stat buf;
    int nline = 0;
    if( (stat(name.c_str(), &buf) == 0)) {
        std::ifstream ifs(name);
        while (std::getline(ifs, line)) {

            if (line[0] != '\n' and line != "") {
                boost::split( elements, line, boost::is_any_of(" \t"), boost::token_compress_on );

                for(size_t i = 0, size = elements.size(); i != size; i++) {
                    if (elements[i] != "")
                        K(nline,i) = double(std::stof(elements[i]));
                }
            }
            nline++;
        }
        ifs.close();
    }
}

std::string join(const std::vector < std::string >& v, std::string d) {
  size_t size;
  std::string s = "";
  s.clear();

  for (size_t i = 0, size = v.size(); i != size; i++) {
      s += v[i];
      if (i != v.size() - 1){
          s += d;
      }
  }
  return s;
}