tesseract
3.03
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: bitvector.h 00005 // Description: Class replacement for BITVECTOR. 00006 // Author: Ray Smith 00007 // Created: Mon Jan 10 17:44:01 PST 2011 00008 // 00009 // (C) Copyright 2011, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 00023 #ifndef TESSERACT_CCUTIL_BITVECTOR_H__ 00024 #define TESSERACT_CCUTIL_BITVECTOR_H__ 00025 00026 #include <assert.h> 00027 #include <stdio.h> 00028 #include "host.h" 00029 00030 namespace tesseract { 00031 00032 // Trivial class to encapsulate a fixed-length array of bits, with 00033 // Serialize/DeSerialize. Replaces the old macros. 00034 class BitVector { 00035 public: 00036 // Fast lookup table to get the first least significant set bit in a byte. 00037 // For zero, the table has 255, but since it is a special case, most code 00038 // that uses this table will check for zero before looking up lsb_index_. 00039 static const uinT8 lsb_index_[256]; 00040 // Fast lookup table to get the residual bits after zeroing the least 00041 // significant set bit in a byte. 00042 static const uinT8 lsb_eroded_[256]; 00043 // Fast lookup table to give the number of set bits in a byte. 00044 static const int hamming_table_[256]; 00045 00046 BitVector(); 00047 // Initializes the array to length * false. 00048 explicit BitVector(int length); 00049 BitVector(const BitVector& src); 00050 BitVector& operator=(const BitVector& src); 00051 ~BitVector(); 00052 00053 // Initializes the array to length * false. 00054 void Init(int length); 00055 00056 // Returns the number of bits that are accessible in the vector. 00057 int size() const { 00058 return bit_size_; 00059 } 00060 00061 // Writes to the given file. Returns false in case of error. 00062 bool Serialize(FILE* fp) const; 00063 // Reads from the given file. Returns false in case of error. 00064 // If swap is true, assumes a big/little-endian swap is needed. 00065 bool DeSerialize(bool swap, FILE* fp); 00066 00067 void SetAllFalse(); 00068 void SetAllTrue(); 00069 00070 // Accessors to set/reset/get bits. 00071 // The range of index is [0, size()-1]. 00072 // There is debug-only bounds checking. 00073 void SetBit(int index) { 00074 array_[WordIndex(index)] |= BitMask(index); 00075 } 00076 void ResetBit(int index) { 00077 array_[WordIndex(index)] &= ~BitMask(index); 00078 } 00079 void SetValue(int index, bool value) { 00080 if (value) 00081 SetBit(index); 00082 else 00083 ResetBit(index); 00084 } 00085 bool At(int index) const { 00086 return (array_[WordIndex(index)] & BitMask(index)) != 0; 00087 } 00088 bool operator[](int index) const { 00089 return (array_[WordIndex(index)] & BitMask(index)) != 0; 00090 } 00091 00092 // Returns the index of the next set bit after the given index. 00093 // Useful for quickly iterating through the set bits in a sparse vector. 00094 int NextSetBit(int prev_bit) const; 00095 00096 // Returns the number of set bits in the vector. 00097 int NumSetBits() const; 00098 00099 // Logical in-place operations on whole bit vectors. Tries to do something 00100 // sensible if they aren't the same size, but they should be really. 00101 void operator|=(const BitVector& other); 00102 void operator&=(const BitVector& other); 00103 void operator^=(const BitVector& other); 00104 // Set subtraction *this = v1 - v2. 00105 void SetSubtract(const BitVector& v1, const BitVector& v2); 00106 00107 private: 00108 // Allocates memory for a vector of the given length. 00109 void Alloc(int length); 00110 00111 // Computes the index to array_ for the given index, with debug range 00112 // checking. 00113 int WordIndex(int index) const { 00114 assert(0 <= index && index < bit_size_); 00115 return index / kBitFactor; 00116 } 00117 // Returns a mask to select the appropriate bit for the given index. 00118 uinT32 BitMask(int index) const { 00119 return 1 << (index & (kBitFactor - 1)); 00120 } 00121 // Returns the number of array elements needed to represent the current 00122 // bit_size_. 00123 int WordLength() const { 00124 return (bit_size_ + kBitFactor - 1) / kBitFactor; 00125 } 00126 // Returns the number of bytes consumed by the array_. 00127 int ByteLength() const { 00128 return WordLength() * sizeof(*array_); 00129 } 00130 00131 // Number of bits in this BitVector. 00132 inT32 bit_size_; 00133 // Array of words used to pack the bits. 00134 // Bits are stored little-endian by uinT32 word, ie by word first and then 00135 // starting with the least significant bit in each word. 00136 uinT32* array_; 00137 // Number of bits in an array_ element. 00138 static const int kBitFactor = sizeof(uinT32) * 8; 00139 }; 00140 00141 } // namespace tesseract. 00142 00143 #endif // TESSERACT_CCUTIL_BITVECTOR_H__