00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef CHAR_SAMP_H
00029 #define CHAR_SAMP_H
00030
00031 #include <stdlib.h>
00032 #include <stdio.h>
00033 #include <string>
00034 #include "bmp_8.h"
00035 #include "string_32.h"
00036
00037 namespace tesseract {
00038
00039 class CharSamp : public Bmp8 {
00040 public:
00041 CharSamp();
00042 CharSamp(int wid, int hgt);
00043 CharSamp(int left, int top, int wid, int hgt);
00044 ~CharSamp();
00045
00046 unsigned short Left() const { return left_; }
00047 unsigned short Right() const { return left_ + wid_; }
00048 unsigned short Top() const { return top_; }
00049 unsigned short Bottom() const { return top_ + hgt_; }
00050 unsigned short Page() const { return page_; }
00051 unsigned short NormTop() const { return norm_top_; }
00052 unsigned short NormBottom() const { return norm_bottom_; }
00053 unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
00054 unsigned short FirstChar() const { return first_char_; }
00055 unsigned short LastChar() const { return last_char_; }
00056 char_32 Label() const {
00057 if (label32_ == NULL || LabelLen() != 1) {
00058 return 0;
00059 }
00060 return label32_[0];
00061 }
00062 char_32 * StrLabel() const { return label32_; }
00063 string stringLabel() const;
00064
00065 void SetLeft(unsigned short left) { left_ = left; }
00066 void SetTop(unsigned short top) { top_ = top; }
00067 void SetPage(unsigned short page) { page_ = page; }
00068 void SetLabel(char_32 label) {
00069 if (label32_ != NULL) {
00070 delete []label32_;
00071 }
00072 label32_ = new char_32[2];
00073 if (label32_ != NULL) {
00074 label32_[0] = label;
00075 label32_[1] = 0;
00076 }
00077 }
00078 void SetLabel(const char_32 *label32) {
00079 if (label32_ != NULL) {
00080 delete []label32_;
00081 label32_ = NULL;
00082 }
00083 if (label32 != NULL) {
00084
00085 if (label32[0] == 0xfeff) {
00086 label32++;
00087 }
00088 int len = LabelLen(label32);
00089 label32_ = new char_32[len + 1];
00090 if (label32_ != NULL) {
00091 memcpy(label32_, label32, len * sizeof(*label32));
00092 label32_[len] = 0;
00093 }
00094 }
00095 }
00096 void SetLabel(string str);
00097 void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
00098 void SetNormBottom(unsigned short norm_bottom) {
00099 norm_bottom_ = norm_bottom;
00100 }
00101 void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
00102 norm_aspect_ratio_ = norm_aspect_ratio;
00103 }
00104 void SetFirstChar(unsigned short first_char) {
00105 first_char_ = first_char;
00106 }
00107 void SetLastChar(unsigned short last_char) {
00108 last_char_ = last_char;
00109 }
00110
00111
00112 bool Save2CharDumpFile(FILE *fp) const;
00113
00114
00115
00116
00117 CharSamp *Crop();
00118
00119 ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
00120 int min_con_comp_size) const;
00121
00122
00123 CharSamp *Scale(int wid, int hgt, bool isotropic = true);
00124
00125 CharSamp *Clone() const;
00126
00127 bool ComputeFeatures(int conv_grid_size, float *features);
00128
00129 static CharSamp *FromCharDumpFile(CachedFile *fp);
00130 static CharSamp *FromCharDumpFile(FILE *fp);
00131 static CharSamp *FromCharDumpFile(unsigned char **raw_data);
00132 static CharSamp *FromRawData(int left, int top, int wid, int hgt,
00133 unsigned char *data);
00134 static CharSamp *FromConComps(ConComp **concomp_array,
00135 int strt_concomp, int seg_flags_size,
00136 int *seg_flags, bool *left_most,
00137 bool *right_most, int word_hgt);
00138 static int AuxFeatureCnt() { return (5); }
00139
00140 int LabelLen() const { return LabelLen(label32_); }
00141 static int LabelLen(const char_32 *label32) {
00142 if (label32 == NULL) {
00143 return 0;
00144 }
00145 int len = 0;
00146 while (label32[++len] != 0);
00147 return len;
00148 }
00149 private:
00150 char_32 * label32_;
00151 unsigned short page_;
00152 unsigned short left_;
00153 unsigned short top_;
00154
00155 unsigned short norm_top_;
00156
00157 unsigned short norm_bottom_;
00158
00159 unsigned short norm_aspect_ratio_;
00160 unsigned short first_char_;
00161 unsigned short last_char_;
00162 };
00163
00164 }
00165
00166 #endif // CHAR_SAMP_H