pktools  2.6.4
Processing Kernel for geospatial data
pkstat.cc
1 /**********************************************************************
2 pkstat.cc: program to calculate basic statistics from raster dataset
3 Copyright (C) 2008-2015 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <iostream>
21 #include <fstream>
22 #include <math.h>
23 #include "base/Optionpk.h"
24 #include "algorithms/StatFactory.h"
25 #include "algorithms/ImgRegression.h"
26 /******************************************************************************/
78 using namespace std;
79 
80 int main(int argc, char *argv[])
81 {
82  Optionpk<string> input_opt("i","input","name of the input raster dataset");
83  Optionpk<unsigned short> band_opt("b","band","band(s) on which to calculate statistics",0);
84  Optionpk<bool> filename_opt("f", "filename", "Shows image filename ", false);
85  Optionpk<bool> stat_opt("stats", "statistics", "Shows basic statistics (calculate in memory) (min,max, mean and stdDev of the raster datasets)", false);
86  Optionpk<bool> fstat_opt("fstats", "fstatistics", "Shows basic statistics using GDAL computeStatistics (min,max, mean and stdDev of the raster datasets)", false);
87  Optionpk<double> ulx_opt("ulx", "ulx", "Upper left x value bounding box");
88  Optionpk<double> uly_opt("uly", "uly", "Upper left y value bounding box");
89  Optionpk<double> lrx_opt("lrx", "lrx", "Lower right x value bounding box");
90  Optionpk<double> lry_opt("lry", "lry", "Lower right y value bounding box");
91  Optionpk<double> nodata_opt("nodata","nodata","Set nodata value(s)");
92  Optionpk<short> down_opt("down", "down", "Down sampling factor (for raster sample datasets only). Can be used to create grid points", 1);
93  Optionpk<unsigned int> random_opt("rnd", "rnd", "generate random numbers", 0);
94  Optionpk<double> scale_opt("scale", "scale", "Scale(s) for reading input image(s)");
95  Optionpk<double> offset_opt("offset", "offset", "Offset(s) for reading input image(s)");
96 
97  // Optionpk<bool> transpose_opt("t","transpose","transpose output",false);
98  // Optionpk<std::string> randdist_opt("dist", "dist", "distribution for generating random numbers, see http://www.gn/software/gsl/manual/gsl-ref_toc.html#TOC320 (only uniform and Gaussian supported yet)", "gaussian");
99  // Optionpk<double> randa_opt("rnda", "rnda", "first parameter for random distribution (mean value in case of Gaussian)", 0);
100  // Optionpk<double> randb_opt("rndb", "rndb", "second parameter for random distribution (standard deviation in case of Gaussian)", 1);
101  Optionpk<bool> mean_opt("mean","mean","calculate mean",false);
102  Optionpk<bool> median_opt("median","median","calculate median",false);
103  Optionpk<bool> var_opt("var","var","calculate variance",false);
104  Optionpk<bool> skewness_opt("skew","skewness","calculate skewness",false);
105  Optionpk<bool> kurtosis_opt("kurt","kurtosis","calculate kurtosis",false);
106  Optionpk<bool> stdev_opt("stdev","stdev","calculate standard deviation",false);
107  Optionpk<bool> sum_opt("sum","sum","calculate sum of column",false);
108  Optionpk<bool> minmax_opt("mm","minmax","calculate minimum and maximum value",false);
109  Optionpk<bool> min_opt("min","min","calculate minimum value",false);
110  Optionpk<bool> max_opt("max","max","calculate maximum value",false);
111  Optionpk<double> src_min_opt("src_min","src_min","start reading source from this minimum value");
112  Optionpk<double> src_max_opt("src_max","src_max","stop reading source from this maximum value");
113  Optionpk<bool> histogram_opt("hist","hist","calculate histogram",false);
114  Optionpk<bool> histogram2d_opt("hist2d","hist2d","calculate 2-dimensional histogram based on two images",false);
115  Optionpk<short> nbin_opt("nbin","nbin","number of bins to calculate histogram");
116  Optionpk<bool> relative_opt("rel","relative","use percentiles for histogram to calculate histogram",false);
117  Optionpk<bool> kde_opt("kde","kde","Use Kernel density estimation when producing histogram. The standard deviation is estimated based on Silverman's rule of thumb",false);
118  Optionpk<bool> rmse_opt("rmse","rmse","calculate root mean square error between two raster datasets",false);
119  Optionpk<bool> reg_opt("reg","regression","calculate linear regression between two raster datasets and get correlation coefficient",false);
120  Optionpk<bool> regerr_opt("regerr","regerr","calculate linear regression between two raster datasets and get root mean square error",false);
121  Optionpk<bool> preg_opt("preg","preg","calculate perpendicular regression between two raster datasets and get correlation coefficient",false);
122  Optionpk<short> verbose_opt("v", "verbose", "verbose mode when positive", 0,2);
123  fstat_opt.setHide(1);
124  ulx_opt.setHide(1);
125  uly_opt.setHide(1);
126  lrx_opt.setHide(1);
127  lry_opt.setHide(1);
128  down_opt.setHide(1);
129  random_opt.setHide(1);
130  scale_opt.setHide(1);
131  offset_opt.setHide(1);
132  src_min_opt.setHide(1);
133  src_max_opt.setHide(1);
134  kde_opt.setHide(1);
135 
136  // range_opt.setHide(1);
137  // transpose_opt.setHide(1);
138 
139  bool doProcess;//stop process when program was invoked with help option (-h --help)
140  try{
141  //mandatory options
142  doProcess=input_opt.retrieveOption(argc,argv);
143  //optional options
144  band_opt.retrieveOption(argc,argv);
145  filename_opt.retrieveOption(argc,argv);
146  stat_opt.retrieveOption(argc,argv);
147  fstat_opt.retrieveOption(argc,argv);
148  nodata_opt.retrieveOption(argc,argv);
149  mean_opt.retrieveOption(argc,argv);
150  median_opt.retrieveOption(argc,argv);
151  var_opt.retrieveOption(argc,argv);
152  stdev_opt.retrieveOption(argc,argv);
153  minmax_opt.retrieveOption(argc,argv);
154  min_opt.retrieveOption(argc,argv);
155  max_opt.retrieveOption(argc,argv);
156  histogram_opt.retrieveOption(argc,argv);
157  nbin_opt.retrieveOption(argc,argv);
158  relative_opt.retrieveOption(argc,argv);
159  histogram2d_opt.retrieveOption(argc,argv);
160  rmse_opt.retrieveOption(argc,argv);
161  reg_opt.retrieveOption(argc,argv);
162  regerr_opt.retrieveOption(argc,argv);
163  preg_opt.retrieveOption(argc,argv);
164  //advanced options
165  ulx_opt.retrieveOption(argc,argv);
166  uly_opt.retrieveOption(argc,argv);
167  lrx_opt.retrieveOption(argc,argv);
168  lry_opt.retrieveOption(argc,argv);
169  down_opt.retrieveOption(argc,argv);
170  random_opt.retrieveOption(argc,argv);
171  scale_opt.retrieveOption(argc,argv);
172  offset_opt.retrieveOption(argc,argv);
173  src_min_opt.retrieveOption(argc,argv);
174  src_max_opt.retrieveOption(argc,argv);
175  kde_opt.retrieveOption(argc,argv);
176  verbose_opt.retrieveOption(argc,argv);
177  }
178  catch(string predefinedString){
179  std::cout << predefinedString << std::endl;
180  exit(0);
181  }
182  if(!doProcess){
183  cout << endl;
184  cout << "Usage: pkstat -i input" << endl;
185  cout << endl;
186  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
187  exit(0);//help was invoked, stop processing
188  }
189 
190  if(src_min_opt.size()){
191  while(src_min_opt.size()<band_opt.size())
192  src_min_opt.push_back(src_min_opt[0]);
193  }
194  if(src_max_opt.size()){
195  while(src_max_opt.size()<band_opt.size())
196  src_max_opt.push_back(src_max_opt[0]);
197  }
198 
199  unsigned int nbin=0;
200  double minX=0;
201  double minY=0;
202  double maxX=0;
203  double maxY=0;
204  double minValue=0;
205  double maxValue=0;
206  double meanValue=0;
207  double stdDev=0;
208 
209  const char* pszMessage;
210  void* pProgressArg=NULL;
211  GDALProgressFunc pfnProgress=GDALTermProgress;
212  double progress=0;
213  srand(time(NULL));
214 
217  std::vector<double> histogramOutput;
218  double nsample=0;
219 
220  ImgReaderGdal imgReader;
221 
222  if(scale_opt.size()){
223  while(scale_opt.size()<input_opt.size())
224  scale_opt.push_back(scale_opt[0]);
225  }
226  if(offset_opt.size()){
227  while(offset_opt.size()<input_opt.size())
228  offset_opt.push_back(offset_opt[0]);
229  }
230  if(input_opt.empty()){
231  std::cerr << "No image dataset provided (use option -i). Use --help for help information";
232  exit(0);
233  }
234  for(int ifile=0;ifile<input_opt.size();++ifile){
235  try{
236  imgReader.open(input_opt[ifile]);
237  }
238  catch(std::string errorstring){
239  std::cout << errorstring << std::endl;
240  exit(0);
241  }
242 
243  if(filename_opt[0])
244  std::cout << " --input " << input_opt[ifile] << " ";
245 
246  for(int inodata=0;inodata<nodata_opt.size();++inodata)
247  imgReader.pushNoDataValue(nodata_opt[inodata]);
248 
249  int nband=band_opt.size();
250  for(int iband=0;iband<nband;++iband){
251 
252  for(int inodata=0;inodata<nodata_opt.size();++inodata){
253  if(!inodata)
254  imgReader.GDALSetNoDataValue(nodata_opt[0],band_opt[iband]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
255  }
256 
257  if(offset_opt.size()>ifile)
258  imgReader.setOffset(offset_opt[ifile],band_opt[iband]);
259  if(scale_opt.size()>ifile)
260  imgReader.setScale(scale_opt[ifile],band_opt[iband]);
261 
262  if(stat_opt[0]||mean_opt[0]||var_opt[0]||stdev_opt[0]){//the hard way (in memory)
264  vector<double> readBuffer;
265  double varValue;
266  imgReader.readDataBlock(readBuffer, GDT_Float64, 0, imgReader.nrOfCol()-1, 0, imgReader.nrOfRow()-1, band_opt[0]);
267  stat.setNoDataValues(nodata_opt);
268  stat.meanVar(readBuffer,meanValue,varValue);
269  stat.minmax(readBuffer,readBuffer.begin(),readBuffer.end(),minValue,maxValue);
270  if(mean_opt[0])
271  std::cout << "--mean " << meanValue << " ";
272  if(stdev_opt[0])
273  std::cout << "--stdDev " << sqrt(varValue) << " ";
274  if(var_opt[0])
275  std::cout << "--var " << varValue << " ";
276  if(stat_opt[0])
277  std::cout << "-min " << minValue << " -max " << maxValue << " --mean " << meanValue << " --stdDev " << sqrt(varValue) << " ";
278  }
279 
280  if(fstat_opt[0]){//the fast way
281  assert(band_opt[iband]<imgReader.nrOfBand());
282  GDALProgressFunc pfnProgress;
283  void* pProgressData;
284  GDALRasterBand* rasterBand;
285  rasterBand=imgReader.getRasterBand(band_opt[iband]);
286  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev,pfnProgress,pProgressData);
287 
288  std::cout << "-min " << minValue << " -max " << maxValue << " --mean " << meanValue << " --stdDev " << stdDev << " ";
289  }
290 
291  if(minmax_opt[0]||min_opt[0]||max_opt[0]){
292  assert(band_opt[iband]<imgReader.nrOfBand());
293 
294  if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
295  double uli,ulj,lri,lrj;
296  imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
297  imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
298  imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
299  }
300  else{
301  imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
302  }
303  if(minmax_opt[0])
304  std::cout << "-min " << minValue << " -max " << maxValue << " ";
305  else{
306  if(min_opt[0])
307  std::cout << "-min " << minValue << " ";
308  if(max_opt[0])
309  std::cout << "-max " << maxValue << " ";
310  }
311  }
312  }
313  if(histogram_opt[0]){//aggregate results from multiple inputs, but only calculate for first selected band
314  assert(band_opt[0]<imgReader.nrOfBand());
315  nbin=(nbin_opt.size())? nbin_opt[0]:0;
316 
317  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
318  if(src_min_opt.size())
319  minValue=src_min_opt[0];
320  if(src_max_opt.size())
321  maxValue=src_max_opt[0];
322  if(minValue>=maxValue)
323  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
324 
325  if(verbose_opt[0])
326  cout << "number of valid pixels in image: " << imgReader.getNvalid(band_opt[0]) << endl;
327 
328  nsample+=imgReader.getHistogram(histogramOutput,minValue,maxValue,nbin,band_opt[0],kde_opt[0]);
329 
330  //only output for last input file
331  if(ifile==input_opt.size()-1){
332  std::cout.precision(10);
333  for(int bin=0;bin<nbin;++bin){
334  double binValue=0;
335  if(nbin==maxValue-minValue+1)
336  binValue=minValue+bin;
337  else
338  binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
339  std::cout << binValue << " ";
340  if(relative_opt[0]||kde_opt[0])
341  std::cout << 100.0*static_cast<double>(histogramOutput[bin])/static_cast<double>(nsample) << std::endl;
342  else
343  std::cout << static_cast<double>(histogramOutput[bin]) << std::endl;
344  }
345  }
346  }
347  if(histogram2d_opt[0]&&input_opt.size()<2){
348  assert(band_opt.size()>1);
349  imgReader.getMinMax(minX,maxX,band_opt[0]);
350  imgReader.getMinMax(minY,maxY,band_opt[1]);
351  if(src_min_opt.size()){
352  minX=src_min_opt[0];
353  minY=src_min_opt[1];
354  }
355  if(src_max_opt.size()){
356  maxX=src_max_opt[0];
357  maxY=src_max_opt[1];
358  }
359  nbin=(nbin_opt.size())? nbin_opt[0]:0;
360  if(nbin<=1){
361  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
362  if(minX>=maxX)
363  imgReader.getMinMax(minX,maxX,band_opt[0]);
364  if(minY>=maxY)
365  imgReader.getMinMax(minY,maxY,band_opt[1]);
366 
367  minValue=(minX<minY)? minX:minY;
368  maxValue=(maxX>maxY)? maxX:maxY;
369  if(verbose_opt[0])
370  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
371  nbin=maxValue-minValue+1;
372  }
373  assert(nbin>1);
374  double sigma=0;
375  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
376  if(kde_opt[0]){
377  assert(band_opt[0]<imgReader.nrOfBand());
378  assert(band_opt[1]<imgReader.nrOfBand());
379  GDALProgressFunc pfnProgress;
380  void* pProgressData;
381  GDALRasterBand* rasterBand;
382  double stdDev1=0;
383  double stdDev2=0;
384  rasterBand=imgReader.getRasterBand(band_opt[0]);
385  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
386  rasterBand=imgReader.getRasterBand(band_opt[1]);
387  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
388 
389  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
390  if(random_opt[0]>0)
391  estimatedSize*=random_opt[0]/100.0;
392  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
393  }
394  assert(nbin);
395  if(verbose_opt[0]){
396  if(sigma>0)
397  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
398  else
399  std::cout << "calculating 2d histogram for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
400  std::cout << "nbin: " << nbin << std::endl;
401  }
402 
403 
404  vector< vector<double> > output;
405 
406  if(maxX<=minX)
407  imgReader.getMinMax(minX,maxX,band_opt[0]);
408  if(maxY<=minY)
409  imgReader.getMinMax(minY,maxY,band_opt[1]);
410 
411  if(maxX<=minX){
412  std::ostringstream s;
413  s<<"Error: could not calculate distribution (minX>=maxX)";
414  throw(s.str());
415  }
416  if(maxY<=minY){
417  std::ostringstream s;
418  s<<"Error: could not calculate distribution (minY>=maxY)";
419  throw(s.str());
420  }
421  output.resize(nbin);
422  for(int i=0;i<nbin;++i){
423  output[i].resize(nbin);
424  for(int j=0;j<nbin;++j)
425  output[i][j]=0;
426  }
427  int binX=0;
428  int binY=0;
429  vector<double> inputX(imgReader.nrOfCol());
430  vector<double> inputY(imgReader.nrOfCol());
431  unsigned long int nvalid=0;
432  for(int irow=0;irow<imgReader.nrOfRow();++irow){
433  if(irow%down_opt[0])
434  continue;
435  imgReader.readData(inputX,GDT_Float64,irow,band_opt[0]);
436  imgReader.readData(inputY,GDT_Float64,irow,band_opt[1]);
437  for(int icol=0;icol<imgReader.nrOfCol();++icol){
438  if(icol%down_opt[0])
439  continue;
440  if(random_opt[0]>0){
441  double p=static_cast<double>(rand())/(RAND_MAX);
442  p*=100.0;
443  if(p>random_opt[0])
444  continue;//do not select for now, go to next column
445  }
446  if(imgReader.isNoData(inputX[icol]))
447  continue;
448  if(imgReader.isNoData(inputY[icol]))
449  continue;
450  ++nvalid;
451  if(inputX[icol]>=maxX)
452  binX=nbin-1;
453  else if(inputX[icol]<=minX)
454  binX=0;
455  else
456  binX=static_cast<int>(static_cast<double>(inputX[icol]-minX)/(maxX-minX)*nbin);
457  if(inputY[icol]>=maxY)
458  binY=nbin-1;
459  else if(inputY[icol]<=minX)
460  binY=0;
461  else
462  binY=static_cast<int>(static_cast<double>(inputY[icol]-minY)/(maxY-minY)*nbin);
463  assert(binX>=0);
464  assert(binX<output.size());
465  assert(binY>=0);
466  assert(binY<output[binX].size());
467  if(sigma>0){
468  //create kde for Gaussian basis function
469  //todo: speed up by calculating first and last bin with non-zero contriubtion...
470  for(int ibinX=0;ibinX<nbin;++ibinX){
471  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
472  double pdfX=gsl_ran_gaussian_pdf(inputX[icol]-centerX, sigma);
473  for(int ibinY=0;ibinY<nbin;++ibinY){
474  //calculate \integral_ibinX^(ibinX+1)
475  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
476  double pdfY=gsl_ran_gaussian_pdf(inputY[icol]-centerY, sigma);
477  output[ibinX][binY]+=pdfX*pdfY;
478  }
479  }
480  }
481  else
482  ++output[binX][binY];
483  }
484  }
485  if(verbose_opt[0])
486  cout << "number of valid pixels: " << nvalid << endl;
487 
488  for(int binX=0;binX<nbin;++binX){
489  cout << endl;
490  for(int binY=0;binY<nbin;++binY){
491  double binValueX=0;
492  if(nbin==maxX-minX+1)
493  binValueX=minX+binX;
494  else
495  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
496  double binValueY=0;
497  if(nbin==maxY-minY+1)
498  binValueY=minY+binY;
499  else
500  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
501 
502  double value=static_cast<double>(output[binX][binY]);
503 
504  if(relative_opt[0])
505  value*=100.0/nvalid;
506 
507  cout << binValueX << " " << binValueY << " " << value << std::endl;
508  // double value=static_cast<double>(output[binX][binY])/nvalid;
509  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
510  }
511  }
512  }
513  if(reg_opt[0]&&input_opt.size()<2){
514  if(band_opt.size()<2)
515  continue;
516  imgreg.setDown(down_opt[0]);
517  imgreg.setThreshold(random_opt[0]);
518  double c0=0;//offset
519  double c1=1;//scale
520  double r2=imgreg.getR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
521  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
522  }
523  if(regerr_opt[0]&&input_opt.size()<2){
524  if(band_opt.size()<2)
525  continue;
526  imgreg.setDown(down_opt[0]);
527  imgreg.setThreshold(random_opt[0]);
528  double c0=0;//offset
529  double c1=1;//scale
530  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
531  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
532  }
533  if(rmse_opt[0]&&input_opt.size()<2){
534  if(band_opt.size()<2)
535  continue;
536  imgreg.setDown(down_opt[0]);
537  imgreg.setThreshold(random_opt[0]);
538  double c0=0;//offset
539  double c1=1;//scale
540  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
541  std::cout << " -rmse " << err << std::endl;
542  }
543  if(preg_opt[0]&&input_opt.size()<2){
544  if(band_opt.size()<2)
545  continue;
546  imgreg.setDown(down_opt[0]);
547  imgreg.setThreshold(random_opt[0]);
548  double c0=0;//offset
549  double c1=1;//scale
550  double r2=imgreg.pgetR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
551  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
552  }
553  imgReader.close();
554  }
555  if(reg_opt[0]&&(input_opt.size()>1)){
556  imgreg.setDown(down_opt[0]);
557  imgreg.setThreshold(random_opt[0]);
558  double c0=0;//offset
559  double c1=1;//scale
560  while(band_opt.size()<input_opt.size())
561  band_opt.push_back(band_opt[0]);
562  if(src_min_opt.size()){
563  while(src_min_opt.size()<input_opt.size())
564  src_min_opt.push_back(src_min_opt[0]);
565  }
566  if(src_max_opt.size()){
567  while(src_max_opt.size()<input_opt.size())
568  src_max_opt.push_back(src_max_opt[0]);
569  }
570  ImgReaderGdal imgReader1(input_opt[0]);
571  ImgReaderGdal imgReader2(input_opt[1]);
572 
573  if(offset_opt.size())
574  imgReader1.setOffset(offset_opt[0],band_opt[0]);
575  if(scale_opt.size())
576  imgReader1.setScale(scale_opt[0],band_opt[0]);
577  if(offset_opt.size()>1)
578  imgReader2.setOffset(offset_opt[1],band_opt[1]);
579  if(scale_opt.size()>1)
580  imgReader2.setScale(scale_opt[1],band_opt[1]);
581 
582  for(int inodata=0;inodata<nodata_opt.size();++inodata){
583  if(!inodata){
584  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
585  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
586  }
587  imgReader1.pushNoDataValue(nodata_opt[inodata]);
588  imgReader2.pushNoDataValue(nodata_opt[inodata]);
589  }
590 
591  double r2=imgreg.getR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
592  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
593  imgReader1.close();
594  imgReader2.close();
595  }
596  if(preg_opt[0]&&(input_opt.size()>1)){
597  imgreg.setDown(down_opt[0]);
598  imgreg.setThreshold(random_opt[0]);
599  double c0=0;//offset
600  double c1=1;//scale
601  while(band_opt.size()<input_opt.size())
602  band_opt.push_back(band_opt[0]);
603  if(src_min_opt.size()){
604  while(src_min_opt.size()<input_opt.size())
605  src_min_opt.push_back(src_min_opt[0]);
606  }
607  if(src_max_opt.size()){
608  while(src_max_opt.size()<input_opt.size())
609  src_max_opt.push_back(src_max_opt[0]);
610  }
611  ImgReaderGdal imgReader1(input_opt[0]);
612  ImgReaderGdal imgReader2(input_opt[1]);
613 
614  if(offset_opt.size())
615  imgReader1.setOffset(offset_opt[0],band_opt[0]);
616  if(scale_opt.size())
617  imgReader1.setScale(scale_opt[0],band_opt[0]);
618  if(offset_opt.size()>1)
619  imgReader2.setOffset(offset_opt[1],band_opt[1]);
620  if(scale_opt.size()>1)
621  imgReader2.setScale(scale_opt[1],band_opt[1]);
622 
623  for(int inodata=0;inodata<nodata_opt.size();++inodata){
624  if(!inodata){
625  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
626  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
627  }
628  imgReader1.pushNoDataValue(nodata_opt[inodata]);
629  imgReader2.pushNoDataValue(nodata_opt[inodata]);
630  }
631 
632  double r2=imgreg.pgetR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
633  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
634  imgReader1.close();
635  imgReader2.close();
636  }
637  if(regerr_opt[0]&&(input_opt.size()>1)){
638  imgreg.setDown(down_opt[0]);
639  imgreg.setThreshold(random_opt[0]);
640  double c0=0;//offset
641  double c1=1;//scale
642  while(band_opt.size()<input_opt.size())
643  band_opt.push_back(band_opt[0]);
644  if(src_min_opt.size()){
645  while(src_min_opt.size()<input_opt.size())
646  src_min_opt.push_back(src_min_opt[0]);
647  }
648  if(src_max_opt.size()){
649  while(src_max_opt.size()<input_opt.size())
650  src_max_opt.push_back(src_max_opt[0]);
651  }
652  ImgReaderGdal imgReader1(input_opt[0]);
653  ImgReaderGdal imgReader2(input_opt[1]);
654 
655  if(offset_opt.size())
656  imgReader1.setOffset(offset_opt[0],band_opt[0]);
657  if(scale_opt.size())
658  imgReader1.setScale(scale_opt[0],band_opt[0]);
659  if(offset_opt.size()>1)
660  imgReader2.setOffset(offset_opt[1],band_opt[1]);
661  if(scale_opt.size()>1)
662  imgReader2.setScale(scale_opt[1],band_opt[1]);
663 
664  for(int inodata=0;inodata<nodata_opt.size();++inodata){
665  if(!inodata){
666  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
667  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
668  }
669  imgReader1.pushNoDataValue(nodata_opt[inodata]);
670  imgReader2.pushNoDataValue(nodata_opt[inodata]);
671  }
672 
673  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
674  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
675  imgReader1.close();
676  imgReader2.close();
677  }
678  if(rmse_opt[0]&&(input_opt.size()>1)){
679  imgreg.setDown(down_opt[0]);
680  imgreg.setThreshold(random_opt[0]);
681  double c0=0;//offset
682  double c1=1;//scale
683  while(band_opt.size()<input_opt.size())
684  band_opt.push_back(band_opt[0]);
685  if(src_min_opt.size()){
686  while(src_min_opt.size()<input_opt.size())
687  src_min_opt.push_back(src_min_opt[0]);
688  }
689  if(src_max_opt.size()){
690  while(src_max_opt.size()<input_opt.size())
691  src_max_opt.push_back(src_max_opt[0]);
692  }
693  ImgReaderGdal imgReader1(input_opt[0]);
694  ImgReaderGdal imgReader2(input_opt[1]);
695 
696  if(offset_opt.size())
697  imgReader1.setOffset(offset_opt[0],band_opt[0]);
698  if(scale_opt.size())
699  imgReader1.setScale(scale_opt[0],band_opt[0]);
700  if(offset_opt.size()>1)
701  imgReader2.setOffset(offset_opt[1],band_opt[1]);
702  if(scale_opt.size()>1)
703  imgReader2.setScale(scale_opt[1],band_opt[1]);
704 
705  for(int inodata=0;inodata<nodata_opt.size();++inodata){
706  if(!inodata){
707  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
708  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
709  }
710  imgReader1.pushNoDataValue(nodata_opt[inodata]);
711  imgReader2.pushNoDataValue(nodata_opt[inodata]);
712  }
713 
714  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
715  std::cout << "-rmse " << err << std::endl;
716  imgReader1.close();
717  imgReader2.close();
718  }
719  if(histogram2d_opt[0]&&(input_opt.size()>1)){
720  while(band_opt.size()<input_opt.size())
721  band_opt.push_back(band_opt[0]);
722  if(src_min_opt.size()){
723  while(src_min_opt.size()<input_opt.size())
724  src_min_opt.push_back(src_min_opt[0]);
725  }
726  if(src_max_opt.size()){
727  while(src_max_opt.size()<input_opt.size())
728  src_max_opt.push_back(src_max_opt[0]);
729  }
730  ImgReaderGdal imgReader1(input_opt[0]);
731  ImgReaderGdal imgReader2(input_opt[1]);
732 
733  if(offset_opt.size())
734  imgReader1.setOffset(offset_opt[0],band_opt[0]);
735  if(scale_opt.size())
736  imgReader1.setScale(scale_opt[0],band_opt[0]);
737  if(offset_opt.size()>1)
738  imgReader2.setOffset(offset_opt[1],band_opt[1]);
739  if(scale_opt.size()>1)
740  imgReader2.setScale(scale_opt[1],band_opt[1]);
741 
742  for(int inodata=0;inodata<nodata_opt.size();++inodata){
743  if(!inodata){
744  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
745  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
746  }
747  imgReader1.pushNoDataValue(nodata_opt[inodata]);
748  imgReader2.pushNoDataValue(nodata_opt[inodata]);
749  }
750 
751  imgReader1.getMinMax(minX,maxX,band_opt[0]);
752  imgReader2.getMinMax(minY,maxY,band_opt[1]);
753 
754  if(verbose_opt[0]){
755  cout << "minX: " << minX << endl;
756  cout << "maxX: " << maxX << endl;
757  cout << "minY: " << minY << endl;
758  cout << "maxY: " << maxY << endl;
759  }
760 
761  if(src_min_opt.size()){
762  minX=src_min_opt[0];
763  minY=src_min_opt[1];
764  }
765  if(src_max_opt.size()){
766  maxX=src_max_opt[0];
767  maxY=src_max_opt[1];
768  }
769 
770  nbin=(nbin_opt.size())? nbin_opt[0]:0;
771  if(nbin<=1){
772  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
773  // imgReader1.getMinMax(minX,maxX,band_opt[0]);
774  // imgReader2.getMinMax(minY,maxY,band_opt[0]);
775  if(minX>=maxX)
776  imgReader1.getMinMax(minX,maxX,band_opt[0]);
777  if(minY>=maxY)
778  imgReader2.getMinMax(minY,maxY,band_opt[1]);
779 
780  minValue=(minX<minY)? minX:minY;
781  maxValue=(maxX>maxY)? maxX:maxY;
782  if(verbose_opt[0])
783  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
784  nbin=maxValue-minValue+1;
785  }
786  assert(nbin>1);
787  double sigma=0;
788  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
789  if(kde_opt[0]){
790  GDALProgressFunc pfnProgress;
791  void* pProgressData;
792  GDALRasterBand* rasterBand;
793  double stdDev1=0;
794  double stdDev2=0;
795  rasterBand=imgReader1.getRasterBand(band_opt[0]);
796  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
797  rasterBand=imgReader2.getRasterBand(band_opt[0]);
798  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
799 
800  //todo: think of smarter way how to estimate size (nodata!)
801  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
802  if(random_opt[0]>0)
803  estimatedSize*=random_opt[0]/100.0;
804  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
805  }
806  assert(nbin);
807  if(verbose_opt[0]){
808  if(sigma>0)
809  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
810  else
811  std::cout << "calculating 2d histogram for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
812  std::cout << "nbin: " << nbin << std::endl;
813  }
814 
815  vector< vector<double> > output;
816 
817  if(maxX<=minX)
818  imgReader1.getMinMax(minX,maxX,band_opt[0]);
819  if(maxY<=minY)
820  imgReader2.getMinMax(minY,maxY,band_opt[1]);
821 
822  if(maxX<=minX){
823  std::ostringstream s;
824  s<<"Error: could not calculate distribution (minX>=maxX)";
825  throw(s.str());
826  }
827  if(maxY<=minY){
828  std::ostringstream s;
829  s<<"Error: could not calculate distribution (minY>=maxY)";
830  throw(s.str());
831  }
832  if(verbose_opt[0]){
833  cout << "minX: " << minX << endl;
834  cout << "maxX: " << maxX << endl;
835  cout << "minY: " << minY << endl;
836  cout << "maxY: " << maxY << endl;
837  }
838  output.resize(nbin);
839  for(int i=0;i<nbin;++i){
840  output[i].resize(nbin);
841  for(int j=0;j<nbin;++j)
842  output[i][j]=0;
843  }
844  int binX=0;
845  int binY=0;
846  vector<double> inputX(imgReader1.nrOfCol());
847  vector<double> inputY(imgReader2.nrOfCol());
848  double nvalid=0;
849  double geoX=0;
850  double geoY=0;
851  double icol1=0;
852  double irow1=0;
853  double icol2=0;
854  double irow2=0;
855  for(int irow=0;irow<imgReader1.nrOfRow();++irow){
856  if(irow%down_opt[0])
857  continue;
858  irow1=irow;
859  imgReader1.image2geo(icol1,irow1,geoX,geoY);
860  imgReader2.geo2image(geoX,geoY,icol2,irow2);
861  irow2=static_cast<int>(irow2);
862  imgReader1.readData(inputX,GDT_Float64,irow1,band_opt[0]);
863  imgReader2.readData(inputY,GDT_Float64,irow2,band_opt[1]);
864  for(int icol=0;icol<imgReader.nrOfCol();++icol){
865  if(icol%down_opt[0])
866  continue;
867  icol1=icol;
868  if(random_opt[0]>0){
869  double p=static_cast<double>(rand())/(RAND_MAX);
870  p*=100.0;
871  if(p>random_opt[0])
872  continue;//do not select for now, go to next column
873  }
874  if(imgReader1.isNoData(inputX[icol]))
875  continue;
876  imgReader1.image2geo(icol1,irow1,geoX,geoY);
877  imgReader2.geo2image(geoX,geoY,icol2,irow2);
878  icol2=static_cast<int>(icol2);
879  if(imgReader2.isNoData(inputY[icol2]))
880  continue;
881  // ++nvalid;
882  if(inputX[icol1]>=maxX)
883  binX=nbin-1;
884  else if(inputX[icol]<=minX)
885  binX=0;
886  else
887  binX=static_cast<int>(static_cast<double>(inputX[icol1]-minX)/(maxX-minX)*nbin);
888  if(inputY[icol2]>=maxY)
889  binY=nbin-1;
890  else if(inputY[icol2]<=minY)
891  binY=0;
892  else
893  binY=static_cast<int>(static_cast<double>(inputY[icol2]-minY)/(maxY-minY)*nbin);
894  assert(binX>=0);
895  assert(binX<output.size());
896  assert(binY>=0);
897  assert(binY<output[binX].size());
898  if(sigma>0){
899  //create kde for Gaussian basis function
900  //todo: speed up by calculating first and last bin with non-zero contriubtion...
901  for(int ibinX=0;ibinX<nbin;++ibinX){
902  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
903  double pdfX=gsl_ran_gaussian_pdf(inputX[icol1]-centerX, sigma);
904  for(int ibinY=0;ibinY<nbin;++ibinY){
905  //calculate \integral_ibinX^(ibinX+1)
906  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
907  double pdfY=gsl_ran_gaussian_pdf(inputY[icol2]-centerY, sigma);
908  output[ibinX][binY]+=pdfX*pdfY;
909  nvalid+=pdfX*pdfY;
910  }
911  }
912  }
913  else{
914  ++output[binX][binY];
915  ++nvalid;
916  }
917  }
918  }
919  if(verbose_opt[0])
920  cout << "number of valid pixels: " << nvalid << endl;
921  for(int binX=0;binX<nbin;++binX){
922  cout << endl;
923  for(int binY=0;binY<nbin;++binY){
924  double binValueX=0;
925  if(nbin==maxX-minX+1)
926  binValueX=minX+binX;
927  else
928  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
929  double binValueY=0;
930  if(nbin==maxY-minY+1)
931  binValueY=minY+binY;
932  else
933  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
934  double value=static_cast<double>(output[binX][binY]);
935 
936  if(relative_opt[0]||kde_opt[0])
937  value*=100.0/nvalid;
938 
939  cout << binValueX << " " << binValueY << " " << value << std::endl;
940  // double value=static_cast<double>(output[binX][binY])/nvalid;
941  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
942  }
943  }
944  imgReader1.close();
945  imgReader2.close();
946  }
947 
948  if(!histogram_opt[0]||histogram2d_opt[0])
949  std::cout << std::endl;
950 }
951 
952 // int nband=(band_opt.size()) ? band_opt.size() : imgReader.nrOfBand();
953 
954 // const char* pszMessage;
955 // void* pProgressArg=NULL;
956 // GDALProgressFunc pfnProgress=GDALTermProgress;
957 // double progress=0;
958 // srand(time(NULL));
959 
960 
961 // statfactory::StatFactory stat;
962 // imgregression::ImgRegression imgreg;
963 
964 // pfnProgress(progress,pszMessage,pProgressArg);
965 // for(irow=0;irow<classReader.nrOfRow();++irow){
966 // if(irow%down_opt[0])
967 // continue;
968 // // classReader.readData(classBuffer,GDT_Int32,irow);
969 // classReader.readData(classBuffer,GDT_Float64,irow);
970 // double x,y;//geo coordinates
971 // double iimg,jimg;//image coordinates in img image
972 // for(icol=0;icol<classReader.nrOfCol();++icol){
973 // if(icol%down_opt[0])
974  // continue;
975 
976 
977  // if(rand_opt[0]>0){
978  // gsl_rng* r=stat.getRandomGenerator(time(NULL));
979  // //todo: init random number generator using time...
980  // if(verbose_opt[0])
981  // std::cout << "generating " << rand_opt[0] << " random numbers: " << std::endl;
982  // for(unsigned int i=0;i<rand_opt[0];++i)
983  // std::cout << i << " " << stat.getRandomValue(r,randdist_opt[0],randa_opt[0],randb_opt[0]) << std::endl;
984  // }
985 
986  // imgreg.setDown(down_opt[0]);
987  // imgreg.setThreshold(threshold_opt[0]);
988  // double c0=0;//offset
989  // double c1=1;//scale
990  // double err=uncertNodata_opt[0];//start with high initial value in case we do not have first ob err=imgreg.getRMSE(imgReaderModel1,imgReader,c0,c1,verbose_opt[0]);
991 
992  // int nband=band_opt.size();
993  // if(band_opt[0]<0)
994  // nband=imgReader.nrOfBand();
995  // for(int iband=0;iband<nband;++iband){
996  // unsigned short band_opt[iband]=(band_opt[0]<0)? iband : band_opt[iband];
997 
998  // if(minmax_opt[0]||min_opt[0]||max_opt[0]){
999  // assert(band_opt[iband]<imgReader.nrOfBand());
1000  // if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
1001  // double uli,ulj,lri,lrj;
1002  // imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
1003  // imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
1004  // imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
1005  // }
1006  // else
1007  // imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
1008  // if(minmax_opt[0])
1009  // std::cout << "-min " << minValue << " -max " << maxValue << " ";
1010  // else{
1011  // if(min_opt[0])
1012  // std::cout << "-min " << minValue << " ";
1013  // if(max_opt[0])
1014  // std::cout << "-max " << maxValue << " ";
1015  // }
1016  // }
1017  // }
1018  // if(relative_opt[0])
1019  // hist_opt[0]=true;
1020  // if(hist_opt[0]){
1021  // assert(band_opt[0]<imgReader.nrOfBand());
1022  // unsigned int nbin=(nbin_opt.size())? nbin_opt[0]:0;
1023  // std::vector<unsigned long int> output;
1024  // minValue=0;
1025  // maxValue=0;
1026  // //todo: optimize such that getMinMax is only called once...
1027  // imgReader.getMinMax(minValue,maxValue,band_opt[0]);
1028 
1029  // if(src_min_opt.size())
1030  // minValue=src_min_opt[0];
1031  // if(src_max_opt.size())
1032  // maxValue=src_max_opt[0];
1033  // unsigned long int nsample=imgReader.getHistogram(output,minValue,maxValue,nbin,band_opt[0]);
1034  // std::cout.precision(10);
1035  // for(int bin=0;bin<nbin;++bin){
1036  // double binValue=0;
1037  // if(nbin==maxValue-minValue+1)
1038  // binValue=minValue+bin;
1039  // else
1040  // binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
1041  // std::cout << binValue << " ";
1042  // if(relative_opt[0])
1043  // std::cout << 100.0*static_cast<double>(output[bin])/static_cast<double>(nsample) << std::endl;
1044  // else
1045  // std::cout << static_cast<double>(output[bin]) << std::endl;
1046  // }
1047  // }