tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccutil/strngs.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        strngs.h  (Formerly strings.h)
00003  * Description: STRING class definition.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Fri Feb 15 09:15:01 GMT 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           STRNGS_H
00021 #define           STRNGS_H
00022 
00023 #include          <stdio.h>
00024 #include          <string.h>
00025 #include          "platform.h"
00026 #include          "memry.h"
00027 
00028 // STRING_IS_PROTECTED means that  string[index] = X is invalid
00029 // because you have to go through strings interface to modify it.
00030 // This allows the string to ensure internal integrity and maintain
00031 // its own string length. Unfortunately this is not possible because
00032 // STRINGS are used as direct-manipulation data buffers for things
00033 // like length arrays and many places cast away the const on string()
00034 // to mutate the string. Turning this off means that internally we
00035 // cannot assume we know the strlen.
00036 #define STRING_IS_PROTECTED  0
00037 
00038 template <typename T> class GenericVector;
00039 
00040 class TESS_API STRING
00041 {
00042   public:
00043     STRING();
00044     STRING(const STRING &string);
00045     STRING(const char *string);
00046     ~STRING ();
00047 
00048     // Writes to the given file. Returns false in case of error.
00049     bool Serialize(FILE* fp) const;
00050     // Reads from the given file. Returns false in case of error.
00051     // If swap is true, assumes a big/little-endian swap is needed.
00052     bool DeSerialize(bool swap, FILE* fp);
00053 
00054     BOOL8 contains(const char c) const;
00055     inT32 length() const;
00056     inT32 size() const { return length(); }
00057     const char *string() const;
00058     const char *c_str() const;
00059 
00060     inline char* strdup() const {
00061      inT32 len = length() + 1;
00062      return strncpy(new char[len], GetCStr(), len);
00063     }
00064 
00065 #if STRING_IS_PROTECTED
00066     const char &operator[] (inT32 index) const;
00067     // len is number of chars in s to insert starting at index in this string
00068     void insert_range(inT32 index, const char*s, int len);
00069     void erase_range(inT32 index, int len);
00070 #else
00071     char &operator[] (inT32 index) const;
00072 #endif
00073     void split(const char c, GenericVector<STRING> *splited);
00074     void truncate_at(inT32 index);
00075 
00076     BOOL8 operator== (const STRING & string) const;
00077     BOOL8 operator!= (const STRING & string) const;
00078     BOOL8 operator!= (const char *string) const;
00079 
00080     STRING & operator= (const char *string);
00081     STRING & operator= (const STRING & string);
00082 
00083     STRING operator+ (const STRING & string) const;
00084     STRING operator+ (const char ch) const;
00085 
00086     STRING & operator+= (const char *string);
00087     STRING & operator+= (const STRING & string);
00088     STRING & operator+= (const char ch);
00089 
00090     // Assignment for strings which are not null-terminated.
00091     void assign(const char *cstr, int len);
00092 
00093     // Appends the given string and int (as a %d) to this.
00094     // += cannot be used for ints as there as a char += operator that would
00095     // be ambiguous, and ints usually need a string before or between them
00096     // anyway.
00097     void add_str_int(const char* str, int number);
00098     // Appends the given string and double (as a %.8g) to this.
00099     void add_str_double(const char* str, double number);
00100 
00101     // ensure capacity but keep pointer encapsulated
00102     inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
00103 
00104   private:
00105     typedef struct STRING_HEADER {
00106       // How much space was allocated in the string buffer for char data.
00107       int capacity_;
00108 
00109       // used_ is how much of the capacity is currently being used,
00110       // including a '\0' terminator.
00111       //
00112       // If used_ is 0 then string is NULL (not even the '\0')
00113       // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
00114       // else strlen is >= 0 (not NULL) but needs to be computed.
00115       //      this condition is set when encapsulation is violated because
00116       //      an API returned a mutable string.
00117       //
00118       // capacity_ - used_ = excess capacity that the string can grow
00119       //                     without reallocating
00120       mutable int used_;
00121     } STRING_HEADER;
00122 
00123     // To preserve the behavior of the old serialization, we only have space
00124     // for one pointer in this structure. So we are embedding a data structure
00125     // at the start of the storage that will hold additional state variables,
00126     // then storing the actual string contents immediately after.
00127     STRING_HEADER* data_;
00128 
00129     // returns the header part of the storage
00130     inline STRING_HEADER* GetHeader() {
00131       return data_;
00132     }
00133     inline const STRING_HEADER* GetHeader() const {
00134       return data_;
00135     }
00136 
00137     // returns the string data part of storage
00138     inline char* GetCStr() {
00139       return ((char *)data_) + sizeof(STRING_HEADER);
00140     };
00141 
00142     inline const char* GetCStr() const {
00143       return ((const char *)data_) + sizeof(STRING_HEADER);
00144     };
00145     inline bool InvariantOk() const {
00146 #if STRING_IS_PROTECTED
00147       return (GetHeader()->used_ == 0) ?
00148         (string() == NULL) : (GetHeader()->used_ == (strlen(string()) + 1));
00149 #else
00150       return true;
00151 #endif
00152     }
00153 
00154     // Ensure string has requested capacity as optimization
00155     // to avoid unnecessary reallocations.
00156     // The return value is a cstr buffer with at least requested capacity
00157     char* ensure_cstr(inT32 min_capacity);
00158 
00159     void FixHeader() const;  // make used_ non-negative, even if const
00160 
00161     char* AllocData(int used, int capacity);
00162     void DiscardData();
00163 };
00164 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines