tesseract
3.03
|
00001 /********************************************************************** 00002 * File: linlsq.h (Formerly llsq.h) 00003 * Description: Linear Least squares fitting code. 00004 * Author: Ray Smith 00005 * Created: Thu Sep 12 08:44:51 BST 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef TESSERACT_CCSTRUCT_LINLSQ_H_ 00021 #define TESSERACT_CCSTRUCT_LINLSQ_H_ 00022 00023 #include "points.h" 00024 #include "params.h" 00025 00026 class LLSQ { 00027 public: 00028 LLSQ() { // constructor 00029 clear(); // set to zeros 00030 } 00031 void clear(); // initialize 00032 00033 // Adds an element with a weight of 1. 00034 void add(double x, double y); 00035 // Adds an element with a specified weight. 00036 void add(double x, double y, double weight); 00037 // Adds a whole LLSQ. 00038 void add(const LLSQ& other); 00039 // Deletes an element with a weight of 1. 00040 void remove(double x, double y); 00041 inT32 count() const { // no of elements 00042 return static_cast<int>(total_weight + 0.5); 00043 } 00044 00045 double m() const; // get gradient 00046 double c(double m) const; // get constant 00047 double rms(double m, double c) const; // get error 00048 double pearson() const; // get correlation coefficient. 00049 00050 // Returns the x,y means as an FCOORD. 00051 FCOORD mean_point() const; 00052 00053 // Returns the average sum of squared perpendicular error from a line 00054 // through mean_point() in the direction dir. 00055 double rms_orth(const FCOORD &dir) const; 00056 00057 // Returns the direction of the fitted line as a unit vector, using the 00058 // least mean squared perpendicular distance. The line runs through the 00059 // mean_point, i.e. a point p on the line is given by: 00060 // p = mean_point() + lambda * vector_fit() for some real number lambda. 00061 // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous 00062 // and may be negated without changing its meaning, since a line is only 00063 // unique to a range of pi radians. 00064 // Modernists prefer to think of this as an Eigenvalue problem, but 00065 // Pearson had the simple solution in 1901. 00066 // 00067 // Note that this is equivalent to returning the Principal Component in PCA, 00068 // or the eigenvector corresponding to the largest eigenvalue in the 00069 // covariance matrix. 00070 FCOORD vector_fit() const; 00071 00072 // Returns the covariance. 00073 double covariance() const { 00074 if (total_weight > 0.0) 00075 return (sigxy - sigx * sigy / total_weight) / total_weight; 00076 else 00077 return 0.0; 00078 } 00079 double x_variance() const { 00080 if (total_weight > 0.0) 00081 return (sigxx - sigx * sigx / total_weight) / total_weight; 00082 else 00083 return 0.0; 00084 } 00085 double y_variance() const { 00086 if (total_weight > 0.0) 00087 return (sigyy - sigy * sigy / total_weight) / total_weight; 00088 else 00089 return 0.0; 00090 } 00091 00092 private: 00093 double total_weight; // no of elements or sum of weights. 00094 double sigx; // sum of x 00095 double sigy; // sum of y 00096 double sigxx; // sum x squared 00097 double sigxy; // sum of xy 00098 double sigyy; // sum y squared 00099 }; 00100 00101 00102 // Returns the median value of the vector, given that the values are 00103 // circular, with the given modulus. Values may be signed or unsigned, 00104 // eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi). 00105 // NOTE that the array is shuffled, but the time taken is linear. 00106 // An assumption is made that most of the values are spread over no more than 00107 // half the range, but wrap-around is accounted for if the median is near 00108 // the wrap-around point. 00109 // Cannot be a member of GenericVector, as it makes heavy used of LLSQ. 00110 // T must be an integer or float/double type. 00111 template<typename T> T MedianOfCircularValues(T modulus, GenericVector<T>* v) { 00112 LLSQ stats; 00113 T halfrange = static_cast<T>(modulus / 2); 00114 int num_elements = v->size(); 00115 for (int i = 0; i < num_elements; ++i) { 00116 stats.add((*v)[i], (*v)[i] + halfrange); 00117 } 00118 bool offset_needed = stats.y_variance() < stats.x_variance(); 00119 if (offset_needed) { 00120 for (int i = 0; i < num_elements; ++i) { 00121 (*v)[i] += halfrange; 00122 } 00123 } 00124 int median_index = v->choose_nth_item(num_elements / 2); 00125 if (offset_needed) { 00126 for (int i = 0; i < num_elements; ++i) { 00127 (*v)[i] -= halfrange; 00128 } 00129 } 00130 return (*v)[median_index]; 00131 } 00132 00133 00134 #endif // TESSERACT_CCSTRUCT_LINLSQ_H_