proteinortho  6.0.14
proteinortho_clustering.h File Reference
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <list>
#include <map>
#include <algorithm>
#include <cmath>
#include <vector>
#include <stack>
#include <iomanip>
#include <cstdlib>
#include <ctime>
#include <memory>
#include <climits>

Go to the source code of this file.

Classes

class  ConnectedComponent
 
struct  wedge
 
struct  protein
 
struct  compare_ConnectedComponents
 
struct  protein_degree
 

Macros

#define floatprecision_H   1
 

Typedefs

using floattype = float
 

Functions

void ssyevr_ (char *jobz, char *range, char *uplo, int *n, float *a, int *lda, float *vl, float *vu, int *il, int *iu, float *abstol, int *m, float *w, float *z, int *ldz, int *isuppz, float *work, int *lwork, int *iwork, int *liwork, int *info)
 
void dsyevr_ (char *jobz, char *range, char *uplo, int *n, double *a, int *lda, double *vl, double *vu, int *il, int *iu, double *abstol, int *m, double *w, double *z, int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info)
 
template<class T >
void dssyevr_ (char *jobz, char *range, char *uplo, int *n, T *a, int *lda, T *vl, T *vu, int *il, int *iu, T *abstol, int *m, T *w, T *z, int *ldz, int *isuppz, T *work, int *lwork, int *iwork, int *liwork, int *info)
 
ConnectedComponent BFS (vector< bool > *done, unsigned int cur_node) size_t getPeakRSS()
 
size_t getCurrentRSS ()
 
void printHelp ()
 
unsigned int numberOfNodesToMemoryUsageLaplacian_inKB (unsigned int n)
 
unsigned int BFS_not_critical (vector< unsigned int > *done_withBacktrack, unsigned int start_node, unsigned int end_node)
 
bool criticalHeuristic (ConnectedComponent *cur_cc)
 
void partition_graph ()
 
void clear_edges (vector< unsigned int > &nodes)
 
void removeExternalEdges (map< unsigned int, bool > &a)
 
void remove_edge_index (const unsigned int node_id, const unsigned int index)
 
void parse_file (string file)
 
void sort_species (void)
 
void stats (floattype i, floattype size)
 
void print_header ()
 
void print_group (vector< unsigned int > &nodes, floattype connectivity)
 
floattype calc_group (vector< unsigned int > &nodes)
 
floattype string2floattype (string str)
 
void tokenize (const string &str, vector< string > &tokens, const string &delimiters="\t")
 
unsigned int max_of_diag (vector< unsigned int > &nodes, vector< unsigned int > &diag)
 
vector< floattypegenerate_random_vector (const unsigned int size)
 
vector< floattypeget_new_x (vector< floattype > x, vector< unsigned int > &nodes, map< unsigned int, unsigned int > &mapping, bool isWeighted)
 
vector< floattypemakeOrthogonal (vector< floattype > x)
 
vector< floattypenormalize (vector< floattype > x, floattype *length)
 
vector< floattypegetY (floattype max_degree, vector< floattype > x_hat, vector< floattype > x_new, vector< unsigned int > &nodes, vector< unsigned int > &diag)
 
unsigned int sumOutDegs (const vector< unsigned int > &nodes)
 
floattype getConnectivity (vector< unsigned int > &nodes, bool useLapack)
 
bool comparator_pairfloattypeUInt (const pair< floattype, unsigned int > &l, const pair< floattype, unsigned int > &r)
 
void splitGroups (vector< floattype > &y, vector< unsigned int > &nodes, bool useLapack)
 
string getTime (void)
 
bool test__max_of_diag ()
 
bool test__generate_random_vector ()
 
bool test__get_new_x ()
 
bool test__makeOrthogonal ()
 
bool test__normalize ()
 
bool test__getY ()
 
bool test__splitGroups ()
 

Variables

bool param_verbose = false
 By default no verbose is printed. More...
 
floattype param_con_threshold = 0.1
 as a reference: a chain a-b-c-d has 0.25 More...
 
unsigned int debug_level = 0
 Debug stderr level. More...
 
floattype param_sep_purity = 1e-7
 as a reference: a-b-c will give +/-0.707107 and 2.34857e-08 More...
 
unsigned int param_max_nodes = 16777216
 = 2^24. The absolute maximal number of nodes for a connected component. If exceeded then it is skipped entirely. More...
 
floattype param_min_species = 1
 Minimum number of species per connected component. More...
 
string param_rmgraph = "remove.graph"
 Name of the remove graph, containing all edges that are removed by this clustering algorithm. More...
 
bool param_useWeights = true
 If true, then weights are used for the calculation of the algebraic connectivity. More...
 
unsigned int param_minOpenmp = 256
 the minimum size of a for-loop for openmp to activate (openmp has some initialization costs) More...
 
bool param_useKmereHeuristic = true
 If true then kmere-heuristic is used. For ambigous splits (e.g. all but one entrie of the fiedler vector are below 0) then the fiedler vector is clustered in k groups instead. This will lead to k new groups instead of 2. See the variables kmereHeuristic_* for more informations when the kmere-heuristic will be activated. More...
 
unsigned int param_maxRam_inKB = 16777216
 = 16 GB of memory as default. Decreasing this may lead to more power iterations and thus to a increase in runtime as a trade-off More...
 
bool param_useLapack = true
 If true, then lapack is used accordingly to param_lapack_power_threshold_d. More...
 
unsigned int critical_min_nodes = 16777216
 Depricated. More...
 
const unsigned int min_iter = 16
 Minimal number of iterations for power iteration. More...
 
unsigned int param_max_iter = 8192
 Maximal number of iterations for power iteration. More...
 
floattype param_epsilon = 1e-8
 Epsilon for lapack functions dsyevr/ssyevr as well as the power iteration. Set analog to http://people.sc.fsu.edu/~jburkardt/c_src/power_method/power_method_prb.c. More...
 
const unsigned int kmereHeuristic_minNodes = 1048576
 = 2^20. The minimum number of nodes of the connected component for the kmere-heuristic. More...
 
const unsigned int kmereHeuristic_protPerSpecies = 1
 The mimum number of proteins per species for the kmere-heuristic. More...
 
const unsigned int kmereHeuristic_minNumberOfGroups = 3
 The minimum number of species for the kmere-heuristic. More...
 
const unsigned int maxUseWeightsNumNodes = 1048576
 = 2^20. The maximum number of nodes for a connected component such that weights are still used for calculations. If the connected component is too large, the unweighted version is used instead to save time. More...
 
floattype param_lapack_power_threshold_d = -1
 The minimum graph density for the power iteration method, lapacks (d|s)syevr is used otherwise. If -1 then the linear function is used instead : If d<10^(-5.2)*n, then lapack otherwise power. More...
 
unsigned int species_counter = 0
 Number of species (in the input graph) More...
 
unsigned int protein_counter = 0
 Number of proteins total (in the input graph) More...
 
vector< string > species
 Species ID (number) -> Species name. More...
 
vector< proteingraph
 Graph containing all protein data (see the class protein for more informations) More...
 
floattype last_stat = 0
 For buffering progress stats. More...
 
unsigned int edges = 0
 Total number of edges. More...
 
vector< shared_ptr< ofstream > > graph_clean
 File handler to store graph data. More...
 
vector< int > reorder_table
 Tells how proteins/species must be sorted. More...
 
unsigned long graph_ram_total_inKB =0
 The internal size of the input graph in KB (this will be set in parse_file() ) More...
 
unsigned int num_cpus =1
 By default only one core is used (change this with -cpus) More...
 

Macro Definition Documentation

◆ floatprecision_H

#define floatprecision_H   1

Typedef Documentation

◆ floattype

using floattype = float