pktools  2.6.4
Processing Kernel for geospatial data
StatFactory.h
1 /**********************************************************************
2 StatFactory.h: class for statistical operations on vectors
3 Copyright (C) 2008-2013 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #ifndef _STATFACTORY_H_
21 #define _STATFACTORY_H_
22 
23 #include <iostream>
24 #include <vector>
25 #include <map>
26 #include <math.h>
27 #include <assert.h>
28 #include <string>
29 #include <sstream>
30 #include <fstream>
31 #include <algorithm>
32 #include <gsl/gsl_fit.h>
33 #include <gsl/gsl_errno.h>
34 #include <gsl/gsl_spline.h>
35 #include <gsl/gsl_rng.h>
36 #include <gsl/gsl_randist.h>
37 #include <gsl/gsl_cdf.h>
38 #include <gsl/gsl_statistics.h>
39 
40 namespace statfactory
41 {
42 
44 
45 public:
46  enum INTERPOLATION_TYPE {undefined=0,linear=1,polynomial=2,cspline=3,cspline_periodic=4,akima=5,akima_periodic=6};
47  //todo: expand with other distributions as in http://www.gnu.org/software/gsl/manual/gsl-ref_toc.html#TOC320
48  enum DISTRIBUTION_TYPE {uniform=1,gaussian=2};
49 
50  StatFactory(void){};
51  virtual ~StatFactory(void){};
52  INTERPOLATION_TYPE getInterpolationType(const std::string interpolationType){
53  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
54  initMap(m_interpMap);
55  return m_interpMap[interpolationType];
56  };
57  DISTRIBUTION_TYPE getDistributionType(const std::string distributionType){
58  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
59  initDist(m_distMap);
60  return m_distMap[distributionType];
61  };
62  static void allocAcc(gsl_interp_accel *&acc){
63  acc = gsl_interp_accel_alloc ();
64  };
65 
66  static void getSpline(const std::string type, int size, gsl_spline *& spline){
67  std::map<std::string, INTERPOLATION_TYPE> m_interpMap;
68  initMap(m_interpMap);
69  switch(m_interpMap[type]){
70  case(polynomial):
71  spline=gsl_spline_alloc(gsl_interp_polynomial,size);
72  break;
73  case(cspline):
74  spline=gsl_spline_alloc(gsl_interp_cspline,size);
75  break;
76  case(cspline_periodic):
77  spline=gsl_spline_alloc(gsl_interp_cspline_periodic,size);
78  break;
79  case(akima):
80  spline=gsl_spline_alloc(gsl_interp_akima,size);
81  break;
82  case(akima_periodic):
83  spline=gsl_spline_alloc(gsl_interp_akima_periodic,size);
84  break;
85  case(linear):
86  default:
87  spline=gsl_spline_alloc(gsl_interp_linear,size);
88  break;
89  }
90  assert(spline);
91  };
92  static int initSpline(gsl_spline *spline, const double *x, const double *y, int size){
93  return gsl_spline_init (spline, x, y, size);
94  };
95  static double evalSpline(gsl_spline *spline, double x, gsl_interp_accel *acc){
96  return gsl_spline_eval (spline, x, acc);
97  };
98 
99  static gsl_rng* getRandomGenerator(unsigned long int theSeed){
100  const gsl_rng_type * T;
101  gsl_rng * r;
102 
103  // select random number generator
104  r = gsl_rng_alloc (gsl_rng_mt19937);
105  gsl_rng_set(r,theSeed);
106  return r;
107  }
108  void getNodataValues(std::vector<double>& nodatav) const{nodatav=m_noDataValues;};
109  bool isNoData(double value) const{
110  if(m_noDataValues.empty())
111  return false;
112  else
113  return find(m_noDataValues.begin(),m_noDataValues.end(),value)!=m_noDataValues.end();
114  };
115  unsigned int pushNodDataValue(double noDataValue){
116  if(find(m_noDataValues.begin(),m_noDataValues.end(),noDataValue)==m_noDataValues.end())
117  m_noDataValues.push_back(noDataValue);
118  return m_noDataValues.size();
119  };
120  unsigned int setNoDataValues(std::vector<double> vnodata){
121  m_noDataValues=vnodata;
122  return m_noDataValues.size();
123  };
124  double getRandomValue(const gsl_rng* r, const std::string type, double a=0, double b=1) const{
125  std::map<std::string, DISTRIBUTION_TYPE> m_distMap;
126  initDist(m_distMap);
127  double randValue=0;
128  switch(m_distMap[type]){
129  case(uniform):
130  randValue = a+gsl_rng_uniform(r);
131  break;
132  case(gaussian):
133  default:
134  randValue = a+gsl_ran_gaussian(r, b);
135  break;
136  }
137  return randValue;
138  };
139 
140 
141  template<class T> T mymin(const typename std::vector<T>& v) const;
142  template<class T> T mymax(const typename std::vector<T>& v) const;
143  template<class T> T mymin(const typename std::vector<T>& v, T minConstraint) const;
144  template<class T> T mymax(const typename std::vector<T>& v, T maxConstraint) const;
145 // template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
146  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
147  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
148  template<class T> typename std::vector<T>::const_iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const;
149  template<class T> typename std::vector<T>::iterator mymin(const typename std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const;
150  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
151  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const;
152  template<class T> typename std::vector<T>::const_iterator mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const;
153  template<class T> typename std::vector<T>::iterator mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const;
154  template<class T> typename std::vector<T>::const_iterator absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
155  template<class T> typename std::vector<T>::const_iterator absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const;
156 
157  template<class T> void minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const;
158  template<class T> T sum(const std::vector<T>& v) const;
159  template<class T> double mean(const std::vector<T>& v) const;
160  template<class T> void eraseNoData(std::vector<T>& v) const;
161  template<class T> T median(const std::vector<T>& v) const;
162  template<class T> double var(const std::vector<T>& v) const;
163  template<class T> double moment(const std::vector<T>& v, int n) const;
164  template<class T> double cmoment(const std::vector<T>& v, int n) const;
165  template<class T> double skewness(const std::vector<T>& v) const;
166  template<class T> double kurtosis(const std::vector<T>& v) const;
167  template<class T> void meanVar(const std::vector<T>& v, double& m1, double& v1) const;
168  template<class T1, class T2> void scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound=0, unsigned char ubound=255) const;
169  template<class T> void distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma=0, const std::string &filename="") const;
170  template<class T> void distribution(const std::vector<T>& input, std::vector<double>& output, int nbin, double sigma=0, const std::string &filename="") const{distribution(input,input.begin(),input.end(),output,nbin,0,0,sigma,filename);};
171  template<class T> void distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma=0, const std::string& filename="") const;
172  template<class T> void cumulative (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<int>& output, int nbin, T &minimum, T &maximum) const;
173  template<class T> void percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename="") const;
174  template<class T> T percentile(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, double percent, T minimum=0, T maximum=0) const;
175  template<class T> void signature(const std::vector<T>& input, double& k, double& alpha, double& beta, double e) const;
176  void signature(double m1, double m2, double& k, double& alpha, double& beta, double e) const;
177  template<class T> void normalize(const std::vector<T>& input, std::vector<double>& output) const;
178  template<class T> void normalize_pct(std::vector<T>& input) const;
179  template<class T> double rmse(const std::vector<T>& x, const std::vector<T>& y) const;
180  template<class T> double correlation(const std::vector<T>& x, const std::vector<T>& y, int delay=0) const;
181  // template<class T> double gsl_correlation(const std::vector<T>& x, const std::vector<T>& y) const;
182  template<class T> double gsl_covariance(const std::vector<T>& x, const std::vector<T>& y) const;
183  template<class T> double cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const;
184  template<class T> double linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
185  template<class T> double linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const;
186  template<class T> void interpolateNoData(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::string& type, std::vector<T>& output, bool verbose=false) const;
187  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose=false) const;
188  template<class T> void interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose=false) const;
189  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, std::vector< std::vector<T> >& output, double start, double end, double step, const gsl_interp_type* type);
190  // template<class T> void interpolateUp(const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthIn, std::vector< std::vector<T> >& output, std::vector<double>& wavelengthOut, double start, double end, double step, const gsl_interp_type* type);
191  template<class T> void interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
192  template<class T> void nearUp(const std::vector<T>& input, std::vector<T>& output) const;
193  template<class T> void interpolateUp(double* input, int dim, std::vector<T>& output, int nbin);
194  template<class T> void interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const;
195  template<class T> void interpolateDown(double* input, int dim, std::vector<T>& output, int nbin);
196 
197 private:
198  static void initMap(std::map<std::string, INTERPOLATION_TYPE>& m_interpMap){
199  //initialize selMap
200  m_interpMap["linear"]=linear;
201  m_interpMap["polynomial"]=polynomial;
202  m_interpMap["cspline"]=cspline;
203  m_interpMap["cspline_periodic"]=cspline_periodic;
204  m_interpMap["akima"]=akima;
205  m_interpMap["akima_periodic"]=akima_periodic;
206  }
207  static void initDist(std::map<std::string, DISTRIBUTION_TYPE>& m_distMap){
208  //initialize distMap
209  m_distMap["gaussian"]=gaussian;
210  m_distMap["uniform"]=uniform;
211  }
212  std::vector<double> m_noDataValues;
213 };
214 
215 
216 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
217 {
218  bool isValid=false;
219  typename std::vector<T>::const_iterator tmpIt;
220  for(typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
221  if(!isNoData(*it)){
222  if(isValid){
223  if(*tmpIt>*it)
224  tmpIt=it;
225  }
226  else{
227  tmpIt=it;
228  isValid=true;
229  }
230  }
231  }
232  if(isValid)
233  return tmpIt;
234  else if(m_noDataValues.size())
235  return m_noDataValues[0];
236  else{
237  std::string errorString="Error: no valid data found";
238  throw(errorString);
239  }
240 }
241 
242 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
243 {
244  bool isValid=false;
245  typename std::vector<T>::iterator tmpIt;
246  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
247  if(!isNoData(*it)){
248  if(isValid){
249  if(*tmpIt>*it)
250  tmpIt=it;
251  }
252  else{
253  tmpIt=it;
254  isValid=true;
255  }
256  }
257  }
258  if(isValid)
259  return tmpIt;
260  else if(m_noDataValues.size())
261  return m_noDataValues[0];
262  else{
263  std::string errorString="Error: no valid data found";
264  throw(errorString);
265  }
266 }
267 
268 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T minConstraint) const
269 {
270  bool isValid=false;
271  typename std::vector<T>::const_iterator tmpIt;
272  T minValue=minConstraint;
273  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
274  if(isNoData(*it))
275  continue;
276  if(isValid){
277  if((minConstraint<=*it)&&(*it<=minValue)){
278  tmpIt=it;
279  minValue=*it;
280  }
281  }
282  else{
283  isValid=true;
284  tmpIt=it;
285  minValue=*it;
286  }
287  }
288  if(isValid)
289  return tmpIt;
290  else if(m_noDataValues.size())
291  return m_noDataValues[0];
292  else{
293  std::string errorString="Error: no valid data found";
294  throw(errorString);
295  }
296 }
297 
298 template<class T> inline typename std::vector<T>::iterator StatFactory::mymin(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T minConstraint) const
299 {
300  bool isValid=false;
301  typename std::vector<T>::iterator tmpIt;
302  T minValue=minConstraint;
303  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
304  if(isNoData(*it))
305  continue;
306  if(isValid){
307  if((minConstraint<=*it)&&(*it<=minValue)){
308  tmpIt=it;
309  minValue=*it;
310  }
311  }
312  else{
313  isValid=true;
314  tmpIt=it;
315  minValue=*it;
316  }
317  }
318  if(isValid)
319  return tmpIt;
320  else if(m_noDataValues.size())
321  return m_noDataValues[0];
322  else{
323  std::string errorString="Error: no valid data found";
324  throw(errorString);
325  }
326 }
327 
328 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
329 {
330  bool isValid=false;
331  typename std::vector<T>::const_iterator tmpIt;
332  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
333  if(isNoData(*it))
334  continue;
335  if(isValid){
336  if(*tmpIt<*it)
337  tmpIt=it;
338  }
339  else{
340  tmpIt=it;
341  isValid=true;
342  }
343  }
344  if(isValid)
345  return tmpIt;
346  else if(m_noDataValues.size())
347  return m_noDataValues[0];
348  else{
349  std::string errorString="Error: no valid data found";
350  throw(errorString);
351  }
352 }
353 
354 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end) const
355 {
356  bool isValid=false;
357  typename std::vector<T>::iterator tmpIt;
358  for (typename std::vector<T>::iterator it = begin; it!=end; ++it){
359  if(isNoData(*it))
360  continue;
361  if(isValid){
362  if(*tmpIt<*it)
363  tmpIt=it;
364  }
365  else{
366  tmpIt=it;
367  isValid=true;
368  }
369  }
370  if(isValid)
371  return tmpIt;
372  else
373  return end;
374 }
375 
376 template<class T> inline typename std::vector<T>::const_iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T maxConstraint) const
377 {
378  bool isValid=false;
379  typename std::vector<T>::const_iterator tmpIt;
380  T maxValue=maxConstraint;
381  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
382  if(isNoData(*it))
383  continue;
384  if(isValid){
385  if((maxConstraint>=*it)&&(*it>=maxValue)){
386  tmpIt=it;
387  maxValue=*it;
388  }
389  }
390  else{
391  isValid=true;
392  tmpIt=it;
393  maxValue=*it;
394  }
395  }
396  if(isValid)
397  return tmpIt;
398  else
399  return end;
400 }
401 
402 template<class T> inline typename std::vector<T>::iterator StatFactory::mymax(const std::vector<T>& v, typename std::vector<T>::iterator begin, typename std::vector<T>::iterator end, T maxConstraint) const
403 {
404  bool isValid=false;
405  typename std::vector<T>::iterator tmpIt=v.end();
406  T maxValue=maxConstraint;
407  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
408  if(isNoData(*it))
409  continue;
410  if(isValid){
411  if((maxConstraint>=*it)&&(*it>=maxValue)){
412  tmpIt=it;
413  maxValue=*it;
414  }
415  }
416  else{
417  isValid=true;
418  tmpIt=it;
419  maxValue=*it;
420  }
421  }
422  if(isValid)
423  return tmpIt;
424  else
425  return end;
426 }
427 
428 template<class T> inline T StatFactory::mymin(const std::vector<T>& v) const
429 {
430  bool isValid=false;
431  if(v.empty()){
432  std::string errorString="Error: vector is empty";
433  throw(errorString);
434  }
435  T minValue;
436  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
437  if(isNoData(*it))
438  continue;
439  if(isValid){
440  if(minValue>*it)
441  minValue=*it;
442  }
443  else{
444  minValue=*it;
445  isValid=true;
446  }
447  }
448  if(isValid)
449  return minValue;
450  else if(m_noDataValues.size())
451  return m_noDataValues[0];
452  else{
453  std::string errorString="Error: no valid data found";
454  throw(errorString);
455  }
456 }
457 
458  template<class T> inline T StatFactory::mymin(const std::vector<T>& v, T minConstraint) const
459 {
460  bool isValid=false;
461  T minValue=minConstraint;
462  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
463  if(isNoData(*it))
464  continue;
465  isValid=true;
466  if((minConstraint<=*it)&&(*it<=minValue))
467  minValue=*it;
468  }
469  if(isValid)
470  return minValue;
471  else if(m_noDataValues.size())
472  return m_noDataValues[0];
473  else{
474  std::string errorString="Error: no valid data found";
475  throw(errorString);
476  }
477 }
478 
479 template<class T> inline T StatFactory::mymax(const std::vector<T>& v) const
480 {
481  bool isValid=false;
482  if(v.empty()){
483  std::string errorString="Error: vector is empty";
484  throw(errorString);
485  }
486  T maxValue;
487  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
488  if(isNoData(*it))
489  continue;
490  if(isValid){
491  if(maxValue<*it)
492  maxValue=*it;
493  }
494  else{
495  maxValue=*it;
496  isValid=true;
497  }
498  }
499  if(isValid)
500  return maxValue;
501  else if(m_noDataValues.size())
502  return m_noDataValues[0];
503  else{
504  std::string errorString="Error: no valid data found";
505  throw(errorString);
506  }
507 }
508 
509 template<class T> inline T StatFactory::mymax(const std::vector<T>& v, T maxConstraint) const
510 {
511  bool isValid=false;
512  T maxValue=maxConstraint;
513  for (typename std::vector<T>::const_iterator it = v.begin(); it!=v.end(); ++it){
514  if(isNoData(*it))
515  continue;
516  if((maxValue<=*it)&&(*it<=maxConstraint))
517  maxValue=*it;
518  }
519  if(isValid)
520  return maxValue;
521  else if(m_noDataValues.size())
522  return m_noDataValues[0];
523  else{
524  std::string errorString="Error: no valid data found";
525  throw(errorString);
526  }
527 }
528 
529 template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
530 {
531  bool isValid=false;
532  typename std::vector<T>::const_iterator tmpIt;
533  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
534  if(isNoData(*it))
535  continue;
536  if(isValid){
537  if(abs(*tmpIt)<abs(*it))
538  tmpIt=it;
539  }
540  else{
541  tmpIt=it;
542  isValid=true;
543  }
544  }
545  if(isValid)
546  return tmpIt;
547  else
548  return end;
549 }
550 
551 template<class T> inline typename std::vector<T>::const_iterator StatFactory::absmin(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end) const
552 {
553  bool isValid=false;
554  typename std::vector<T>::const_iterator tmpIt;
555  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
556  if(isNoData(*it))
557  continue;
558  if(isValid){
559  if(abs(*tmpIt)>abs(*it))
560  tmpIt=it;
561  }
562  else{
563  tmpIt=it;
564  isValid=true;
565  }
566  }
567  if(isValid)
568  return tmpIt;
569  else
570  return end;
571 }
572 
573 template<class T> inline void StatFactory::minmax(const std::vector<T>& v, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, T& theMin, T& theMax) const
574 {
575  bool isValid=false;
576  for (typename std::vector<T>::const_iterator it = begin; it!=end; ++it){
577  if(isNoData(*it))
578  continue;
579  if(isValid){
580  if(theMin>*it)
581  theMin=*it;
582  if(theMax<*it)
583  theMax=*it;
584  }
585  else{
586  theMin=*it;
587  theMax=*it;
588  isValid=true;
589  }
590  }
591  if(!isValid){
592  if(m_noDataValues.size()){
593  theMin=m_noDataValues[0];
594  theMax=m_noDataValues[0];
595  }
596  else{
597  std::string errorString="Error: no valid data found";
598  throw(errorString);
599  }
600  }
601 }
602 
603 template<class T> inline T StatFactory::sum(const std::vector<T>& v) const
604 {
605  bool isValid=false;
606  typename std::vector<T>::const_iterator it;
607  T tmpSum=0;
608  for (it = v.begin(); it!= v.end(); ++it){
609  if(isNoData(*it))
610  continue;
611  isValid=true;
612  tmpSum+=*it;
613  }
614  if(isValid)
615  return tmpSum;
616  else if(m_noDataValues.size())
617  return m_noDataValues[0];
618  else{
619  std::string errorString="Error: no valid data found";
620  throw(errorString);
621  }
622 }
623 
624 template<class T> inline double StatFactory::mean(const std::vector<T>& v) const
625 {
626  typename std::vector<T>::const_iterator it;
627  T tmpSum=0;
628  unsigned int validSize=0;
629  for (it = v.begin(); it!= v.end(); ++it){
630  if(isNoData(*it))
631  continue;
632  ++validSize;
633  tmpSum+=*it;
634  }
635  if(validSize)
636  return static_cast<double>(tmpSum)/validSize;
637  else if(m_noDataValues.size())
638  return m_noDataValues[0];
639  else{
640  std::string errorString="Error: no valid data found";
641  throw(errorString);
642  }
643 }
644 
645 template<class T> inline void StatFactory::eraseNoData(std::vector<T>& v) const
646 {
647  if(m_noDataValues.size()){
648  typename std::vector<T>::iterator it=v.begin();
649  while(it!=v.end()){
650  if(isNoData(*it))
651  v.erase(it);
652  else
653  ++it;
654  }
655  }
656 }
657 
658 template<class T> T StatFactory::median(const std::vector<T>& v) const
659 {
660  std::vector<T> tmpV=v;
661  eraseNoData(tmpV);
662  if(tmpV.size()){
663  sort(tmpV.begin(),tmpV.end());
664  if(tmpV.size()%2)
665  return tmpV[tmpV.size()/2];
666  else
667  return 0.5*(tmpV[tmpV.size()/2-1]+tmpV[tmpV.size()/2]);
668  }
669  else if(m_noDataValues.size())
670  return m_noDataValues[0];
671  else{
672  std::string errorString="Error: no valid data found";
673  throw(errorString);
674  }
675 }
676 
677 template<class T> double StatFactory::var(const std::vector<T>& v) const
678 {
679  typename std::vector<T>::const_iterator it;
680  unsigned int validSize=0;
681  double m1=0;
682  double m2=0;
683  for (it = v.begin(); it!= v.end(); ++it){
684  if(isNoData(*it))
685  continue;
686  m1+=*it;
687  m2+=(*it)*(*it);
688  ++validSize;
689  }
690  if(validSize){
691  m2/=validSize;
692  m1/=validSize;
693  return m2-m1*m1;
694  }
695  else if(m_noDataValues.size())
696  return m_noDataValues[0];
697  else{
698  std::string errorString="Error: no valid data found";
699  throw(errorString);
700  }
701 }
702 
703 template<class T> double StatFactory::moment(const std::vector<T>& v, int n) const
704 {
705  unsigned int validSize=0;
706  typename std::vector<T>::const_iterator it;
707  double m=0;
708 // double m1=mean(v);
709  for(it = v.begin(); it!= v.end(); ++it){
710  if(isNoData(*it))
711  continue;
712  m+=pow((*it),n);
713  ++validSize;
714  }
715  if(validSize)
716  return m/validSize;
717  else if(m_noDataValues.size())
718  return m_noDataValues[0];
719  else{
720  std::string errorString="Error: no valid data found";
721  throw(errorString);
722  }
723 }
724 
725  //central moment
726 template<class T> double StatFactory::cmoment(const std::vector<T>& v, int n) const
727 {
728  unsigned int validSize=0;
729  typename std::vector<T>::const_iterator it;
730  double m=0;
731  double m1=mean(v);
732  for(it = v.begin(); it!= v.end(); ++it){
733  if(isNoData(*it))
734  continue;
735  m+=pow((*it-m1),n);
736  ++validSize;
737  }
738  if(validSize)
739  return m/validSize;
740  else if(m_noDataValues.size())
741  return m_noDataValues[0];
742  else{
743  std::string errorString="Error: no valid data found";
744  throw(errorString);
745  }
746 }
747 
748 template<class T> double StatFactory::skewness(const std::vector<T>& v) const
749 {
750  //todo: what if nodata value?
751  return cmoment(v,3)/pow(var(v),1.5);
752 }
753 
754 template<class T> double StatFactory::kurtosis(const std::vector<T>& v) const
755 {
756  //todo: what if nodata value?
757  double m2=cmoment(v,2);
758  double m4=cmoment(v,4);
759  return m4/m2/m2-3.0;
760 }
761 
762 template<class T> void StatFactory::meanVar(const std::vector<T>& v, double& m1, double& v1) const
763 {
764  typename std::vector<T>::const_iterator it;
765  unsigned int validSize=0;
766  m1=0;
767  v1=0;
768  double m2=0;
769  for (it = v.begin(); it!= v.end(); ++it){
770  if(isNoData(*it))
771  continue;
772  m1+=*it;
773  m2+=(*it)*(*it);
774  ++validSize;
775  }
776  if(validSize){
777  m2/=validSize;
778  m1/=validSize;
779  v1=m2-m1*m1;
780  }
781  else if(m_noDataValues.size()){
782  m1=m_noDataValues[0];
783  v1=m_noDataValues[0];
784  }
785  else{
786  std::string errorString="Error: no valid data found";
787  throw(errorString);
788  }
789 }
790 
791 template<class T1, class T2> void StatFactory::scale2byte(const std::vector<T1>& input, std::vector<T2>& output, unsigned char lbound, unsigned char ubound) const
792 {
793  output.resize(input.size());
794  T1 minimum=mymin(input);
795  T1 maximum=mymax(input);
796  if(minimum>=maximum){
797  std::string errorString="Error: no valid data found";
798  throw(errorString);
799  }
800  double scale=(ubound-lbound)/(maximum-minimum);
801  //todo: what if nodata value?
802  for (int i=0;i<input.size();++i){
803  output[i]=scale*(input[i]-(minimum))+lbound;
804  }
805 }
806 
807 template<class T> void StatFactory::distribution(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<double>& output, int nbin, T &minimum, T &maximum, double sigma, const std::string &filename) const
808 {
809  double minValue=0;
810  double maxValue=0;
811  minmax(input,begin,end,minValue,maxValue);
812  if(minimum<maximum&&minimum>minValue)
813  minValue=minimum;
814  if(minimum<maximum&&maximum<maxValue)
815  maxValue=maximum;
816 
817  //todo: check...
818  minimum=minValue;
819  maximum=maxValue;
820 
821  if(maximum<=minimum){
822  std::ostringstream s;
823  s<<"Error: could not calculate distribution (min>=max)";
824  throw(s.str());
825  }
826  if(!nbin){
827  std::string errorString="Error: nbin not defined";
828  throw(errorString);
829  }
830  if(!input.size()){
831  std::string errorString="Error: no valid data found";
832  throw(errorString);
833  }
834  if(output.size()!=nbin){
835  output.resize(nbin);
836  for(int i=0;i<nbin;output[i++]=0);
837  }
838  bool isValid=false;
839  typename std::vector<T>::const_iterator it;
840  for(it=begin;it!=end;++it){
841  if(*it<minimum)
842  continue;
843  if(*it>maximum)
844  continue;
845  if(isNoData(*it))
846  continue;
847  isValid=true;
848  if(sigma>0){
849  // minimum-=2*sigma;
850  // maximum+=2*sigma;
851  //create kde for Gaussian basis function
852  //todo: speed up by calculating first and last bin with non-zero contriubtion...
853  //todo: calculate real surface below pdf by using gsl_cdf_gaussian_P(x-mean+binsize,sigma)-gsl_cdf_gaussian_P(x-mean,sigma)
854  for(int ibin=0;ibin<nbin;++ibin){
855  double icenter=minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin;
856  double thePdf=gsl_ran_gaussian_pdf(*it-icenter, sigma);
857  output[ibin]+=thePdf;
858  }
859  }
860  else{
861  int theBin=0;
862  if(*it==maximum)
863  theBin=nbin-1;
864  else if(*it>minimum && *it<maximum)
865  theBin=static_cast<int>(static_cast<double>((nbin-1)*(*it)-minimum)/(maximum-minimum));
866  ++output[theBin];
867  // if(*it==maximum)
868  // ++output[nbin-1];
869  // else if(*it>=minimum && *it<maximum)
870  // ++output[static_cast<int>(static_cast<double>((*it)-minimum)/(maximum-minimum)*nbin)];
871  }
872  }
873  if(!isValid){
874  std::string errorString="Error: no valid data found";
875  throw(errorString);
876  }
877  else if(!filename.empty()){
878  std::ofstream outputfile;
879  outputfile.open(filename.c_str());
880  if(!outputfile){
881  std::ostringstream s;
882  s<<"Error opening distribution file , " << filename;
883  throw(s.str());
884  }
885  for(int ibin=0;ibin<nbin;++ibin)
886  outputfile << minimum+static_cast<double>(maximum-minimum)*(ibin+0.5)/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
887  outputfile.close();
888  }
889 }
890 
891 template<class T> void StatFactory::distribution2d(const std::vector<T>& inputX, const std::vector<T>& inputY, std::vector< std::vector<double> >& output, int nbin, T& minX, T& maxX, T& minY, T& maxY, double sigma, const std::string& filename) const
892 {
893  if(inputX.empty()){
894  std::ostringstream s;
895  s<<"Error: inputX is empty";
896  throw(s.str());
897  }
898  if(inputX.size()!=inputY.size()){
899  std::ostringstream s;
900  s<<"Error: inputX is empty";
901  throw(s.str());
902  }
903  unsigned long int npoint=inputX.size();
904  if(maxX<=minX)
905  minmax(inputX,inputX.begin(),inputX.end(),minX,maxX);
906  if(maxX<=minX){
907  std::ostringstream s;
908  s<<"Error: could not calculate distribution (minX>=maxX)";
909  throw(s.str());
910  }
911  if(maxY<=minY)
912  minmax(inputY,inputY.begin(),inputY.end(),minY,maxY);
913  if(maxY<=minY){
914  std::ostringstream s;
915  s<<"Error: could not calculate distribution (minY>=maxY)";
916  throw(s.str());
917  }
918  if(nbin<=1){
919  std::ostringstream s;
920  s<<"Error: nbin must be larger than 1";
921  throw(s.str());
922  }
923  output.resize(nbin);
924  for(int i=0;i<nbin;++i){
925  output[i].resize(nbin);
926  for(int j=0;j<nbin;++j)
927  output[i][j]=0;
928  }
929  int binX=0;
930  int binY=0;
931  for(unsigned long int ipoint=0;ipoint<npoint;++ipoint){
932  if(inputX[ipoint]==maxX)
933  binX=nbin-1;
934  else
935  binX=static_cast<int>(static_cast<double>(inputX[ipoint]-minX)/(maxX-minX)*nbin);
936  if(inputY[ipoint]==maxY)
937  binY=nbin-1;
938  else
939  binY=static_cast<int>(static_cast<double>(inputY[ipoint]-minY)/(maxY-minY)*nbin);
940  if(binX<0){
941  std::ostringstream s;
942  s<<"Error: binX is smaller than 0";
943  throw(s.str());
944  }
945  if(output.size()<=binX){
946  std::ostringstream s;
947  s<<"Error: output size must be larger than binX";
948  throw(s.str());
949  }
950  if(binY<0){
951  std::ostringstream s;
952  s<<"Error: binY is smaller than 0";
953  throw(s.str());
954  }
955  if(output.size()<=binY){
956  std::ostringstream s;
957  s<<"Error: output size must be larger than binY";
958  throw(s.str());
959  }
960  if(sigma>0){
961  // minX-=2*sigma;
962  // maxX+=2*sigma;
963  // minY-=2*sigma;
964  // maxY+=2*sigma;
965  //create kde for Gaussian basis function
966  //todo: speed up by calculating first and last bin with non-zero contriubtion...
967  for(int ibinX=0;ibinX<nbin;++ibinX){
968  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
969  double pdfX=gsl_ran_gaussian_pdf(inputX[ipoint]-centerX, sigma);
970  for(int ibinY=0;ibinY<nbin;++ibinY){
971  //calculate \integral_ibinX^(ibinX+1)
972  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
973  double pdfY=gsl_ran_gaussian_pdf(inputY[ipoint]-centerY, sigma);
974  output[ibinX][binY]+=pdfX*pdfY;
975  }
976  }
977  }
978  else
979  ++output[binX][binY];
980  }
981  if(!filename.empty()){
982  std::ofstream outputfile;
983  outputfile.open(filename.c_str());
984  if(!outputfile){
985  std::ostringstream s;
986  s<<"Error opening distribution file , " << filename;
987  throw(s.str());
988  }
989  for(int binX=0;binX<nbin;++binX){
990  outputfile << std::endl;
991  for(int binY=0;binY<nbin;++binY){
992  double binValueX=0;
993  if(nbin==maxX-minX+1)
994  binValueX=minX+binX;
995  else
996  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
997  double binValueY=0;
998  if(nbin==maxY-minY+1)
999  binValueY=minY+binY;
1000  else
1001  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
1002  double value=0;
1003  value=static_cast<double>(output[binX][binY])/npoint;
1004  outputfile << binValueX << " " << binValueY << " " << value << std::endl;
1005  /* double value=static_cast<double>(output[binX][binY])/npoint; */
1006  /* outputfile << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl; */
1007  }
1008  }
1009  outputfile.close();
1010  }
1011 }
1012 
1013 //todo: what with nodata values?
1014 template<class T> void StatFactory::percentiles (const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, std::vector<T>& output, int nbin, T &minimum, T &maximum, const std::string &filename) const
1015 {
1016  if(maximum<=minimum)
1017  minmax(input,begin,end,minimum,maximum);
1018  if(maximum<=minimum){
1019  std::ostringstream s;
1020  s<<"Error: maximum must be at least minimum";
1021  throw(s.str());
1022  }
1023  if(nbin<=1){
1024  std::ostringstream s;
1025  s<<"Error: nbin must be larger than 1";
1026  throw(s.str());
1027  }
1028  if(input.empty()){
1029  std::ostringstream s;
1030  s<<"Error: input is empty";
1031  throw(s.str());
1032  }
1033  output.resize(nbin);
1034  std::vector<T> inputSort;
1035  inputSort.assign(begin,end);
1036  typename std::vector<T>::iterator vit=inputSort.begin();
1037  while(vit!=inputSort.end()){
1038  if(*vit<minimum||*vit>maximum)
1039  inputSort.erase(vit);
1040  else
1041  ++vit;
1042  }
1043  std::sort(inputSort.begin(),inputSort.end());
1044  vit=inputSort.begin();
1045  std::vector<T> inputBin;
1046  for(int ibin=0;ibin<nbin;++ibin){
1047  inputBin.clear();
1048  while(inputBin.size()<inputSort.size()/nbin&&vit!=inputSort.end()){
1049  inputBin.push_back(*vit);
1050  ++vit;
1051  }
1052  if(inputBin.size()){
1053  output[ibin]=mymax(inputBin);
1054  }
1055  }
1056  if(!filename.empty()){
1057  std::ofstream outputfile;
1058  outputfile.open(filename.c_str());
1059  if(!outputfile){
1060  std::ostringstream s;
1061  s<<"error opening distribution file , " << filename;
1062  throw(s.str());
1063  }
1064  for(int ibin=0;ibin<nbin;++ibin)
1065  outputfile << ibin*100.0/nbin << " " << static_cast<double>(output[ibin])/input.size() << std::endl;
1066  outputfile.close();
1067  }
1068 }
1069 
1070 //todo: what with nodata values?
1071 template<class T> T StatFactory::percentile(const std::vector<T>& input, typename std::vector<T>::const_iterator begin, typename std::vector<T>::const_iterator end, double percent, T minimum, T maximum) const
1072 {
1073  if(input.empty()){
1074  std::ostringstream s;
1075  s<<"Error: input is empty";
1076  throw(s.str());
1077  }
1078  std::vector<T> inputSort;
1079  inputSort.assign(begin,end);
1080  typename std::vector<T>::iterator vit=inputSort.begin();
1081  while(vit!=inputSort.end()){
1082  if(maximum>minimum){
1083  if(*vit<minimum||*vit>maximum)
1084  inputSort.erase(vit);
1085  }
1086  else
1087  ++vit;
1088  }
1089  std::sort(inputSort.begin(),inputSort.end());
1090  return gsl_stats_quantile_from_sorted_data(&(inputSort[0]),1,inputSort.size(),percent/100.0);
1091 }
1092 
1093 template<class T> void StatFactory::signature(const std::vector<T>& input, double&k, double& alpha, double& beta, double e) const
1094 {
1095  double m1=moment(input,1);
1096  double m2=moment(input,2);
1097  signature(m1,m2,k,alpha,beta,e);
1098 }
1099 
1100 //todo: what with nodata values?
1101 template<class T> void StatFactory::normalize(const std::vector<T>& input, std::vector<double>& output) const{
1102  double total=sum(input);
1103  if(total){
1104  output.resize(input.size());
1105  for(int index=0;index<input.size();++index)
1106  output[index]=input[index]/total;
1107  }
1108  else
1109  output=input;
1110 }
1111 
1112 //todo: what with nodata values?
1113 template<class T> void StatFactory::normalize_pct(std::vector<T>& input) const{
1114  double total=sum(input);
1115  if(total){
1116  typename std::vector<T>::iterator it;
1117  for(it=input.begin();it!=input.end();++it)
1118  *it=100.0*(*it)/total;
1119  }
1120 }
1121 
1122 template<class T> double StatFactory::rmse(const std::vector<T>& x, const std::vector<T>& y) const{
1123  if(x.size()!=y.size()){
1124  std::ostringstream s;
1125  s<<"Error: x and y not equal in size";
1126  throw(s.str());
1127  }
1128  if(x.empty()){
1129  std::ostringstream s;
1130  s<<"Error: x is empty";
1131  throw(s.str());
1132  }
1133  double mse=0;
1134  for(int isample=0;isample<x.size();++isample){
1135  if(isNoData(x[isample])||isNoData(y[isample]))
1136  continue;
1137  double e=x[isample]-y[isample];
1138  mse+=e*e/x.size();
1139  }
1140  return sqrt(mse);
1141 }
1142 
1143 // template<class T> double StatFactory::gsl_correlation(const std::vector<T>& x, const std::vector<T>& y) const{
1144 // return(gsl_stats_correlation(&(x[0]),1,&(y[0]),1,x.size()));
1145 // }
1146 
1147  template<class T> double StatFactory::gsl_covariance(const std::vector<T>& x, const std::vector<T>& y) const{
1148  return(gsl_stats_covariance(&(x[0]),1,&(y[0]),1,x.size()));
1149  }
1150 
1151 
1152 template<class T> double StatFactory::correlation(const std::vector<T>& x, const std::vector<T>& y, int delay) const{
1153  double meanX=0;
1154  double meanY=0;
1155  double varX=0;
1156  double varY=0;
1157  double sXY=0;
1158  meanVar(x,meanX,varX);
1159  meanVar(y,meanY,varY);
1160  double denom = sqrt(varX*varY);
1161  bool isValid=false;
1162  if(denom){
1163  //Calculate the correlation series
1164  sXY = 0;
1165  for (int i=0;i<x.size();++i) {
1166  int j = i + delay;
1167  if (j < 0 || j >= y.size())
1168  continue;
1169  else if(isNoData(x[i])||isNoData(y[j]))
1170  continue;
1171  else{
1172  isValid=true;
1173  if(i<0){
1174  std::ostringstream s;
1175  s<<"Error: i must be positive";
1176  throw(s.str());
1177  }
1178  if(i>=x.size()){
1179  std::ostringstream s;
1180  s<<"Error: i must be smaller than x.size()";
1181  throw(s.str());
1182  }
1183  if(j<0){
1184  std::ostringstream s;
1185  s<<"Error: j must be positive";
1186  throw(s.str());
1187  }
1188  if(j>=y.size()){
1189  std::ostringstream s;
1190  s<<"Error: j must be smaller than y.size()";
1191  throw(s.str());
1192  }
1193  sXY += (x[i] - meanX) * (y[j] - meanY);
1194  }
1195  }
1196  if(isValid){
1197  double minSize=(x.size()<y.size())?x.size():y.size();
1198  return(sXY / denom / (minSize-1));
1199  }
1200  else if(m_noDataValues.size())
1201  return m_noDataValues[0];
1202  else{
1203  std::string errorString="Error: no valid data found";
1204  throw(errorString);
1205  }
1206  }
1207  else
1208  return 0;
1209 }
1210 
1211 //todo: what if no valid data?
1212 template<class T> double StatFactory::cross_correlation(const std::vector<T>& x, const std::vector<T>& y, int maxdelay, std::vector<T>& z) const{
1213  z.clear();
1214  double sumCorrelation=0;
1215  for (int delay=-maxdelay;delay<maxdelay;delay++) {
1216  z.push_back(correlation(x,y,delay));
1217  sumCorrelation+=z.back();
1218  }
1219  return sumCorrelation;
1220 }
1221 
1222 //todo: nodata?
1223 template<class T> double StatFactory::linear_regression(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
1224  if(x.size()!=y.size()){
1225  std::ostringstream s;
1226  s<<"Error: x and y not equal in size";
1227  throw(s.str());
1228  }
1229  if(x.empty()){
1230  std::ostringstream s;
1231  s<<"Error: x is empty";
1232  throw(s.str());
1233  }
1234  double cov00;
1235  double cov01;
1236  double cov11;
1237  double sumsq;
1238  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
1239  return (1-sumsq/var(y)/(y.size()-1));
1240 }
1241 
1242 //todo: nodata?
1243 template<class T> double StatFactory::linear_regression_err(const std::vector<T>& x, const std::vector<T>& y, double &c0, double &c1) const{
1244  if(x.size()!=y.size()){
1245  std::ostringstream s;
1246  s<<"Error: x and y not equal in size";
1247  throw(s.str());
1248  }
1249  if(x.empty()){
1250  std::ostringstream s;
1251  s<<"Error: x is empty";
1252  throw(s.str());
1253  }
1254  double cov00;
1255  double cov01;
1256  double cov11;
1257  double sumsq;
1258  gsl_fit_linear(&(x[0]),1,&(y[0]),1,x.size(),&c0,&c1,&cov00,&cov01,&cov11,&sumsq);
1259  return sqrt((sumsq)/(y.size()));
1260 }
1261 
1262 //alternatively: use GNU scientific library:
1263 // gsl_stats_correlation (const double data1[], const size_t stride1, const double data2[], const size_t stride2, const size_t n)
1264 
1265 template<class T> void StatFactory::interpolateNoData(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::string& type, std::vector<T>& output, bool verbose) const{
1266  if(wavelengthIn.empty()){
1267  std::ostringstream s;
1268  s<<"Error: wavelengthIn is empty";
1269  throw(s.str());
1270  }
1271  std::vector<double> wavelengthOut=wavelengthIn;
1272  std::vector<T> validIn=input;
1273  if(input.size()!=wavelengthIn.size()){
1274  std::ostringstream s;
1275  s<<"Error: x and y not equal in size";
1276  throw(s.str());
1277  }
1278  int nband=wavelengthIn.size();
1279  output.clear();
1280  //remove nodata from input and corresponding wavelengthIn
1281  if(m_noDataValues.size()){
1282  typename std::vector<T>::iterator itValue=validIn.begin();
1283  typename std::vector<T>::iterator itWavelength=wavelengthOut.begin();
1284  while(itValue!=validIn.end()&&itWavelength!=wavelengthOut.end()){
1285  if(isNoData(*itValue)){
1286  validIn.erase(itValue);
1287  wavelengthOut.erase(itWavelength);
1288  }
1289  else{
1290  ++itValue;
1291  ++itWavelength;
1292  }
1293  }
1294  if(validIn.size()>1){
1295  try{
1296  interpolateUp(wavelengthOut, validIn, wavelengthIn, type, output, verbose);
1297  }
1298  catch(...){
1299  output=input;
1300  }
1301  }
1302  else//we can not interpolate if no valid data
1303  output=input;
1304  }
1305  else//no nodata values to interpolate
1306  output=input;
1307 }
1308 
1309 template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector<T>& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector<T>& output, bool verbose) const{
1310  if(wavelengthIn.empty()){
1311  std::ostringstream s;
1312  s<<"Error: wavelengthIn is empty";
1313  throw(s.str());
1314  }
1315  if(input.size()!=wavelengthIn.size()){
1316  std::ostringstream s;
1317  s<<"Error: input and wavelengthIn not equal in size";
1318  throw(s.str());
1319  }
1320  if(wavelengthOut.empty()){
1321  std::ostringstream s;
1322  s<<"Error: wavelengthOut is empty";
1323  throw(s.str());
1324  }
1325  int nband=wavelengthIn.size();
1326  output.clear();
1327  gsl_interp_accel *acc;
1328  allocAcc(acc);
1329  gsl_spline *spline;
1330  getSpline(type,nband,spline);
1331  assert(spline);
1332  assert(&(wavelengthIn[0]));
1333  assert(&(input[0]));
1334  int status=initSpline(spline,&(wavelengthIn[0]),&(input[0]),nband);
1335  if(status){
1336  std::string errorString="Could not initialize spline";
1337  throw(errorString);
1338  }
1339  for(int index=0;index<wavelengthOut.size();++index){
1340  if(wavelengthOut[index]<*wavelengthIn.begin()){
1341  output.push_back(*(input.begin()));
1342  continue;
1343  }
1344  else if(wavelengthOut[index]>wavelengthIn.back()){
1345  output.push_back(input.back());
1346  continue;
1347  }
1348  double dout=evalSpline(spline,wavelengthOut[index],acc);
1349  output.push_back(dout);
1350  }
1351  gsl_spline_free(spline);
1352  gsl_interp_accel_free(acc);
1353 }
1354 
1355 // template<class T> void StatFactory::interpolateUp(const std::vector<double>& wavelengthIn, const std::vector< std::vector<T> >& input, const std::vector<double>& wavelengthOut, const std::string& type, std::vector< std::vector<T> >& output, bool verbose){
1356 // assert(wavelengthIn.size());
1357 // assert(wavelengthOut.size());
1358 // int nsample=input.size();
1359 // int nband=wavelengthIn.size();
1360 // output.clear();
1361 // output.resize(nsample);
1362 // gsl_interp_accel *acc;
1363 // allocAcc(acc);
1364 // gsl_spline *spline;
1365 // getSpline(type,nband,spline);
1366 // for(int isample=0;isample<nsample;++isample){
1367 // assert(input[isample].size()==wavelengthIn.size());
1368 // initSpline(spline,&(wavelengthIn[0]),&(input[isample][0]),nband);
1369 // for(int index=0;index<wavelengthOut.size();++index){
1370 // if(type=="linear"){
1371 // if(wavelengthOut[index]<wavelengthIn.back())
1372 // output[isample].push_back(*(input.begin()));
1373 // else if(wavelengthOut[index]>wavelengthIn.back())
1374 // output[isample].push_back(input.back());
1375 // }
1376 // else{
1377 // double dout=evalSpline(spline,wavelengthOut[index],acc);
1378 // output[isample].push_back(dout);
1379 // }
1380 // }
1381 // }
1382 // gsl_spline_free(spline);
1383 // gsl_interp_accel_free(acc);
1384 // }
1385 
1386 //todo: nodata?
1387 template<class T> void StatFactory::interpolateUp(const std::vector<T>& input, std::vector<T>& output, int nbin) const
1388 {
1389  if(input.empty()){
1390  std::ostringstream s;
1391  s<<"Error: input is empty";
1392  throw(s.str());
1393  }
1394  if(!nbin){
1395  std::ostringstream s;
1396  s<<"Error: nbin must be larger than 0";
1397  throw(s.str());
1398  }
1399  output.clear();
1400  int dim=input.size();
1401  for(int i=0;i<dim;++i){
1402  double deltaX=0;
1403  double left=input[i];
1404  if(i<dim-1){
1405  double right=(i<dim-1)? input[i+1]:input[i];
1406  deltaX=(right-left)/static_cast<double>(nbin);
1407  for(int x=0;x<nbin;++x){
1408  output.push_back(left+x*deltaX);
1409  }
1410  }
1411  else
1412  output.push_back(input.back());
1413  }
1414 }
1415 
1416 //todo: nodata?
1417 template<class T> void StatFactory::nearUp(const std::vector<T>& input, std::vector<T>& output) const
1418 {
1419  if(input.empty()){
1420  std::ostringstream s;
1421  s<<"Error: input is empty";
1422  throw(s.str());
1423  }
1424  if(output.size()<input.size()){
1425  std::ostringstream s;
1426  s<<"Error: output size is smaller than input size";
1427  throw(s.str());
1428  }
1429  int dimInput=input.size();
1430  int dimOutput=output.size();
1431 
1432  for(int iin=0;iin<dimInput;++iin){
1433  for(int iout=0;iout<dimOutput/dimInput;++iout){
1434  int indexOutput=iin*dimOutput/dimInput+iout;
1435  if(indexOutput>=output.size()){
1436  std::ostringstream s;
1437  s<<"Error: indexOutput must be smaller than output.size()";
1438  throw(s.str());
1439  }
1440  output[indexOutput]=input[iin];
1441  }
1442  }
1443 }
1444 
1445 //todo: nodata?
1446 template<class T> void StatFactory::interpolateUp(double* input, int dim, std::vector<T>& output, int nbin)
1447 {
1448  if(!nbin){
1449  std::ostringstream s;
1450  s<<"Error: nbin must be larger than 0";
1451  throw(s.str());
1452  }
1453  output.clear();
1454  for(int i=0;i<dim;++i){
1455  double deltaX=0;
1456  double left=input[i];
1457  if(i<dim-1){
1458  double right=(i<dim-1)? input[i+1]:input[i];
1459  deltaX=(right-left)/static_cast<double>(nbin);
1460  for(int x=0;x<nbin;++x){
1461  output.push_back(left+x*deltaX);
1462  }
1463  }
1464  else
1465  output.push_back(input[dim-1]);
1466  }
1467 }
1468 
1469 //todo: nodata?
1470 template<class T> void StatFactory::interpolateDown(const std::vector<T>& input, std::vector<T>& output, int nbin) const
1471 {
1472  if(input.empty()){
1473  std::ostringstream s;
1474  s<<"Error: input is empty";
1475  throw(s.str());
1476  }
1477  if(!nbin){
1478  std::ostringstream s;
1479  s<<"Error: nbin must be larger than 0";
1480  throw(s.str());
1481  }
1482  output.clear();
1483  int dim=input.size();
1484  int x=0;
1485  output.push_back(input[0]);
1486  for(int i=1;i<dim;++i){
1487  if(i%nbin)
1488  continue;
1489  else{
1490  x=(i-1)/nbin+1;
1491  output.push_back(input[i]);
1492  }
1493  }
1494 }
1495 
1496 //todo: nodata?
1497 template<class T> void StatFactory::interpolateDown(double* input, int dim, std::vector<T>& output, int nbin)
1498 {
1499  if(!nbin){
1500  std::ostringstream s;
1501  s<<"Error: nbin must be larger than 0";
1502  throw(s.str());
1503  }
1504  output.clear();
1505  int x=0;
1506  output.push_back(input[0]);
1507  for(int i=1;i<dim;++i){
1508  if(i%nbin)
1509  continue;
1510  else{
1511  x=(i-1)/nbin+1;
1512  output.push_back(input[i]);
1513  }
1514  }
1515 }
1516 }
1517 
1518 #endif /* _STATFACTORY_H_ */
1519 
1520 // void Histogram::signature(double m1, double m2, double& k, double& alpha, double& beta, double e)
1521 // {
1522 // double y=m1*m1/m2;
1523 // beta=F_1(y,0.1,10.0,e);
1524 // double fb=F(beta);
1525 // double g=exp(lgamma(1.0/beta));
1526 // alpha=m1*g/exp(lgamma(2.0/beta));
1527 // k=beta/(2*alpha*g);
1528 // // std::cout << "y, alpha, beta: " << y << ", " << alpha << ", " << beta << std::endl;
1529 // }
1530 
1531 // double Histogram::F(double x)
1532 // {
1533 // double g2=exp(lgamma(2.0/x));
1534 // return(g2*g2/exp(lgamma(3.0/x))/exp(lgamma(1.0/x)));
1535 // }
1536 
1537 // //x1 is under estimate, x2 is over estimate, e is error
1538 // double Histogram::F_1(double y, double x1, double x2, double e)
1539 // {
1540 // double f1=F(x1);
1541 // double f2=F(x2);
1542 // assert(f1!=f2);
1543 // double x=x1+(x2-x1)*(y-f1)/(f2-f1);
1544 // double f=F(x);
1545 // while(f-y>=e||y-f>=e){
1546 // if(f<y)
1547 // x1=x;
1548 // else
1549 // x2=x;
1550 // if(x1==x2)
1551 // return x1;
1552 // assert(f1!=f2);
1553 // x=x1+(x2-x1)*(y-f1)/(f2-f1);
1554 // f=F(x);
1555 // }
1556 // return x;
1557 // }