tesseract
3.03
|
00001 /********************************************************************** 00002 * File: strngs.c (Formerly strings.c) 00003 * Description: STRING class functions. 00004 * Author: Ray Smith 00005 * Created: Fri Feb 15 09:13:30 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "helpers.h" 00021 #include "tprintf.h" 00022 #include "strngs.h" 00023 #include "genericvector.h" 00024 00025 #include <assert.h> 00026 // Size of buffer needed to host the decimal representation of the maximum 00027 // possible length of an int (in 64 bits), being -<20 digits>. 00028 const int kMaxIntSize = 22; 00029 // Size of buffer needed to host the decimal representation of the maximum 00030 // possible length of a %.8g being -0.12345678e+999<nul> = 15. 00031 const int kMaxDoubleSize = 15; 00032 00033 /********************************************************************** 00034 * STRING_HEADER provides metadata about the allocated buffer, 00035 * including total capacity and how much used (strlen with '\0'). 00036 * 00037 * The implementation hides this header at the start of the data 00038 * buffer and appends the string on the end to keep sizeof(STRING) 00039 * unchanged from earlier versions so serialization is not affected. 00040 * 00041 * The collection of MACROS provide different implementations depending 00042 * on whether the string keeps track of its strlen or not so that this 00043 * feature can be added in later when consumers dont modifify the string 00044 **********************************************************************/ 00045 00046 // Smallest string to allocate by default 00047 const int kMinCapacity = 16; 00048 00049 char* STRING::AllocData(int used, int capacity) { 00050 data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER)); 00051 00052 // header is the metadata for this memory block 00053 STRING_HEADER* header = GetHeader(); 00054 header->capacity_ = capacity; 00055 header->used_ = used; 00056 return GetCStr(); 00057 } 00058 00059 void STRING::DiscardData() { 00060 free_string((char *)data_); 00061 } 00062 00063 // This is a private method; ensure FixHeader is called (or used_ is well defined) 00064 // beforehand 00065 char* STRING::ensure_cstr(inT32 min_capacity) { 00066 STRING_HEADER* orig_header = GetHeader(); 00067 if (min_capacity <= orig_header->capacity_) 00068 return ((char *)this->data_) + sizeof(STRING_HEADER); 00069 00070 // if we are going to grow bigger, than double our existing 00071 // size, but if that still is not big enough then keep the 00072 // requested capacity 00073 if (min_capacity < 2 * orig_header->capacity_) 00074 min_capacity = 2 * orig_header->capacity_; 00075 00076 int alloc = sizeof(STRING_HEADER) + min_capacity; 00077 STRING_HEADER* new_header = (STRING_HEADER*)(alloc_string(alloc)); 00078 00079 memcpy(&new_header[1], GetCStr(), orig_header->used_); 00080 new_header->capacity_ = min_capacity; 00081 new_header->used_ = orig_header->used_; 00082 00083 // free old memory, then rebind to new memory 00084 DiscardData(); 00085 data_ = new_header; 00086 00087 assert(InvariantOk()); 00088 return ((char *)data_) + sizeof(STRING_HEADER); 00089 } 00090 00091 // This is const, but is modifying a mutable field 00092 // this way it can be used on const or non-const instances. 00093 void STRING::FixHeader() const { 00094 const STRING_HEADER* header = GetHeader(); 00095 if (header->used_ < 0) 00096 header->used_ = strlen(GetCStr()) + 1; 00097 } 00098 00099 00100 STRING::STRING() { 00101 // Empty STRINGs contain just the "\0". 00102 memcpy(AllocData(1, kMinCapacity), "", 1); 00103 } 00104 00105 STRING::STRING(const STRING& str) { 00106 str.FixHeader(); 00107 const STRING_HEADER* str_header = str.GetHeader(); 00108 int str_used = str_header->used_; 00109 char *this_cstr = AllocData(str_used, str_used); 00110 memcpy(this_cstr, str.GetCStr(), str_used); 00111 assert(InvariantOk()); 00112 } 00113 00114 STRING::STRING(const char* cstr) { 00115 if (cstr == NULL) { 00116 // Empty STRINGs contain just the "\0". 00117 memcpy(AllocData(1, kMinCapacity), "", 1); 00118 } else { 00119 int len = strlen(cstr) + 1; 00120 char* this_cstr = AllocData(len, len); 00121 memcpy(this_cstr, cstr, len); 00122 } 00123 assert(InvariantOk()); 00124 } 00125 00126 STRING::~STRING() { 00127 DiscardData(); 00128 } 00129 00130 // Writes to the given file. Returns false in case of error. 00131 bool STRING::Serialize(FILE* fp) const { 00132 inT32 len = length(); 00133 if (fwrite(&len, sizeof(len), 1, fp) != 1) return false; 00134 if (static_cast<int>(fwrite(GetCStr(), 1, len, fp)) != len) return false; 00135 return true; 00136 } 00137 // Reads from the given file. Returns false in case of error. 00138 // If swap is true, assumes a big/little-endian swap is needed. 00139 bool STRING::DeSerialize(bool swap, FILE* fp) { 00140 inT32 len; 00141 if (fread(&len, sizeof(len), 1, fp) != 1) return false; 00142 if (swap) 00143 ReverseN(&len, sizeof(len)); 00144 truncate_at(len); 00145 if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false; 00146 return true; 00147 } 00148 00149 BOOL8 STRING::contains(const char c) const { 00150 return (c != '\0') && (strchr (GetCStr(), c) != NULL); 00151 } 00152 00153 inT32 STRING::length() const { 00154 FixHeader(); 00155 return GetHeader()->used_ - 1; 00156 } 00157 00158 const char* STRING::string() const { 00159 const STRING_HEADER* header = GetHeader(); 00160 if (header->used_ == 0) 00161 return NULL; 00162 00163 // mark header length unreliable because tesseract might 00164 // cast away the const and mutate the string directly. 00165 header->used_ = -1; 00166 return GetCStr(); 00167 } 00168 00169 const char* STRING::c_str() const { 00170 return string(); 00171 } 00172 00173 /****** 00174 * The STRING_IS_PROTECTED interface adds additional support to migrate 00175 * code that needs to modify the STRING in ways not otherwise supported 00176 * without violating encapsulation. 00177 * 00178 * Also makes the [] operator return a const so it is immutable 00179 */ 00180 #if STRING_IS_PROTECTED 00181 const char& STRING::operator[](inT32 index) const { 00182 return GetCStr()[index]; 00183 } 00184 00185 void STRING::insert_range(inT32 index, const char* str, int len) { 00186 // if index is outside current range, then also grow size of string 00187 // to accmodate the requested range. 00188 STRING_HEADER* this_header = GetHeader(); 00189 int used = this_header->used_; 00190 if (index > used) 00191 used = index; 00192 00193 char* this_cstr = ensure_cstr(used + len + 1); 00194 if (index < used) { 00195 // move existing string from index to '\0' inclusive. 00196 memmove(this_cstr + index + len, 00197 this_cstr + index, 00198 this_header->used_ - index); 00199 } else if (len > 0) { 00200 // We are going to overwrite previous null terminator, so write the new one. 00201 this_cstr[this_header->used_ + len - 1] = '\0'; 00202 00203 // If the old header did not have the terminator, 00204 // then we need to account for it now that we've added it. 00205 // Otherwise it was already accounted for; we just moved it. 00206 if (this_header->used_ == 0) 00207 ++this_header->used_; 00208 } 00209 00210 // Write new string to index. 00211 // The string is already terminated from the conditions above. 00212 memcpy(this_cstr + index, str, len); 00213 this_header->used_ += len; 00214 00215 assert(InvariantOk()); 00216 } 00217 00218 void STRING::erase_range(inT32 index, int len) { 00219 char* this_cstr = GetCStr(); 00220 STRING_HEADER* this_header = GetHeader(); 00221 00222 memcpy(this_cstr+index, this_cstr+index+len, 00223 this_header->used_ - index - len); 00224 this_header->used_ -= len; 00225 assert(InvariantOk()); 00226 } 00227 00228 #else 00229 void STRING::truncate_at(inT32 index) { 00230 ASSERT_HOST(index >= 0); 00231 FixHeader(); 00232 char* this_cstr = ensure_cstr(index + 1); 00233 this_cstr[index] = '\0'; 00234 GetHeader()->used_ = index + 1; 00235 assert(InvariantOk()); 00236 } 00237 00238 char& STRING::operator[](inT32 index) const { 00239 // Code is casting away this const and mutating the string, 00240 // so mark used_ as -1 to flag it unreliable. 00241 GetHeader()->used_ = -1; 00242 return ((char *)GetCStr())[index]; 00243 } 00244 #endif 00245 00246 void STRING::split(const char c, GenericVector<STRING> *splited) { 00247 int start_index = 0; 00248 for (int i = 0; i < length(); i++) { 00249 if ((*this)[i] == c) { 00250 if (i != start_index) { 00251 (*this)[i] = '\0'; 00252 STRING tmp = GetCStr() + start_index; 00253 splited->push_back(tmp); 00254 (*this)[i] = c; 00255 } 00256 start_index = i + 1; 00257 } 00258 } 00259 00260 if (length() != start_index) { 00261 STRING tmp = GetCStr() + start_index; 00262 splited->push_back(tmp); 00263 } 00264 } 00265 00266 BOOL8 STRING::operator==(const STRING& str) const { 00267 FixHeader(); 00268 str.FixHeader(); 00269 const STRING_HEADER* str_header = str.GetHeader(); 00270 const STRING_HEADER* this_header = GetHeader(); 00271 int this_used = this_header->used_; 00272 int str_used = str_header->used_; 00273 00274 return (this_used == str_used) 00275 && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0); 00276 } 00277 00278 BOOL8 STRING::operator!=(const STRING& str) const { 00279 FixHeader(); 00280 str.FixHeader(); 00281 const STRING_HEADER* str_header = str.GetHeader(); 00282 const STRING_HEADER* this_header = GetHeader(); 00283 int this_used = this_header->used_; 00284 int str_used = str_header->used_; 00285 00286 return (this_used != str_used) 00287 || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0); 00288 } 00289 00290 BOOL8 STRING::operator!=(const char* cstr) const { 00291 FixHeader(); 00292 const STRING_HEADER* this_header = GetHeader(); 00293 00294 if (cstr == NULL) 00295 return this_header->used_ > 1; // either '\0' or NULL 00296 else { 00297 inT32 length = strlen(cstr) + 1; 00298 return (this_header->used_ != length) 00299 || (memcmp(GetCStr(), cstr, length) != 0); 00300 } 00301 } 00302 00303 STRING& STRING::operator=(const STRING& str) { 00304 str.FixHeader(); 00305 const STRING_HEADER* str_header = str.GetHeader(); 00306 int str_used = str_header->used_; 00307 00308 GetHeader()->used_ = 0; // clear since ensure doesnt need to copy data 00309 char* this_cstr = ensure_cstr(str_used); 00310 STRING_HEADER* this_header = GetHeader(); 00311 00312 memcpy(this_cstr, str.GetCStr(), str_used); 00313 this_header->used_ = str_used; 00314 00315 assert(InvariantOk()); 00316 return *this; 00317 } 00318 00319 STRING & STRING::operator+=(const STRING& str) { 00320 FixHeader(); 00321 str.FixHeader(); 00322 const STRING_HEADER* str_header = str.GetHeader(); 00323 const char* str_cstr = str.GetCStr(); 00324 int str_used = str_header->used_; 00325 int this_used = GetHeader()->used_; 00326 char* this_cstr = ensure_cstr(this_used + str_used); 00327 00328 STRING_HEADER* this_header = GetHeader(); // after ensure for realloc 00329 00330 if (this_used > 1) { 00331 memcpy(this_cstr + this_used - 1, str_cstr, str_used); 00332 this_header->used_ += str_used - 1; // overwrite '\0' 00333 } else { 00334 memcpy(this_cstr, str_cstr, str_used); 00335 this_header->used_ = str_used; 00336 } 00337 00338 assert(InvariantOk()); 00339 return *this; 00340 } 00341 00342 void STRING::add_str_int(const char* str, int number) { 00343 if (str != NULL) 00344 *this += str; 00345 // Allow space for the maximum possible length of inT64. 00346 char num_buffer[kMaxIntSize]; 00347 snprintf(num_buffer, kMaxIntSize - 1, "%d", number); 00348 num_buffer[kMaxIntSize - 1] = '\0'; 00349 *this += num_buffer; 00350 } 00351 // Appends the given string and double (as a %.8g) to this. 00352 void STRING::add_str_double(const char* str, double number) { 00353 if (str != NULL) 00354 *this += str; 00355 // Allow space for the maximum possible length of %8g. 00356 char num_buffer[kMaxDoubleSize]; 00357 snprintf(num_buffer, kMaxDoubleSize - 1, "%.8g", number); 00358 num_buffer[kMaxDoubleSize - 1] = '\0'; 00359 *this += num_buffer; 00360 } 00361 00362 STRING & STRING::operator=(const char* cstr) { 00363 STRING_HEADER* this_header = GetHeader(); 00364 if (cstr) { 00365 int len = strlen(cstr) + 1; 00366 00367 this_header->used_ = 0; // dont bother copying data if need to realloc 00368 char* this_cstr = ensure_cstr(len); 00369 this_header = GetHeader(); // for realloc 00370 memcpy(this_cstr, cstr, len); 00371 this_header->used_ = len; 00372 } else { 00373 // Reallocate to same state as default constructor. 00374 DiscardData(); 00375 // Empty STRINGs contain just the "\0". 00376 memcpy(AllocData(1, kMinCapacity), "", 1); 00377 } 00378 00379 assert(InvariantOk()); 00380 return *this; 00381 } 00382 00383 void STRING::assign(const char *cstr, int len) { 00384 STRING_HEADER* this_header = GetHeader(); 00385 this_header->used_ = 0; // dont bother copying data if need to realloc 00386 char* this_cstr = ensure_cstr(len + 1); // +1 for '\0' 00387 00388 this_header = GetHeader(); // for realloc 00389 memcpy(this_cstr, cstr, len); 00390 this_cstr[len] = '\0'; 00391 this_header->used_ = len + 1; 00392 00393 assert(InvariantOk()); 00394 } 00395 00396 STRING STRING::operator+(const STRING& str) const { 00397 STRING result(*this); 00398 result += str; 00399 00400 assert(InvariantOk()); 00401 return result; 00402 } 00403 00404 00405 STRING STRING::operator+(const char ch) const { 00406 STRING result; 00407 FixHeader(); 00408 const STRING_HEADER* this_header = GetHeader(); 00409 int this_used = this_header->used_; 00410 char* result_cstr = result.ensure_cstr(this_used + 1); 00411 STRING_HEADER* result_header = result.GetHeader(); 00412 int result_used = result_header->used_; 00413 00414 // copies '\0' but we'll overwrite that 00415 memcpy(result_cstr, GetCStr(), this_used); 00416 result_cstr[result_used] = ch; // overwrite old '\0' 00417 result_cstr[result_used + 1] = '\0'; // append on '\0' 00418 ++result_header->used_; 00419 00420 assert(InvariantOk()); 00421 return result; 00422 } 00423 00424 00425 STRING& STRING::operator+=(const char *str) { 00426 if (!str || !*str) // empty string has no effect 00427 return *this; 00428 00429 FixHeader(); 00430 int len = strlen(str) + 1; 00431 int this_used = GetHeader()->used_; 00432 char* this_cstr = ensure_cstr(this_used + len); 00433 STRING_HEADER* this_header = GetHeader(); // after ensure for realloc 00434 00435 // if we had non-empty string then append overwriting old '\0' 00436 // otherwise replace 00437 if (this_used > 0) { 00438 memcpy(this_cstr + this_used - 1, str, len); 00439 this_header->used_ += len - 1; 00440 } else { 00441 memcpy(this_cstr, str, len); 00442 this_header->used_ = len; 00443 } 00444 00445 assert(InvariantOk()); 00446 return *this; 00447 } 00448 00449 00450 STRING& STRING::operator+=(const char ch) { 00451 if (ch == '\0') 00452 return *this; 00453 00454 FixHeader(); 00455 int this_used = GetHeader()->used_; 00456 char* this_cstr = ensure_cstr(this_used + 1); 00457 STRING_HEADER* this_header = GetHeader(); 00458 00459 if (this_used > 0) 00460 --this_used; // undo old empty null if there was one 00461 00462 this_cstr[this_used++] = ch; // append ch to end 00463 this_cstr[this_used++] = '\0'; // append '\0' after ch 00464 this_header->used_ = this_used; 00465 00466 assert(InvariantOk()); 00467 return *this; 00468 }