tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccutil/bitvector.h
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00004 // File:        bitvector.h
00005 // Description: Class replacement for BITVECTOR.
00006 // Author:      Ray Smith
00007 // Created:     Mon Jan 10 17:44:01 PST 2011
00008 //
00009 // (C) Copyright 2011, Google Inc.
00010 // Licensed under the Apache License, Version 2.0 (the "License");
00011 // you may not use this file except in compliance with the License.
00012 // You may obtain a copy of the License at
00013 // http://www.apache.org/licenses/LICENSE-2.0
00014 // Unless required by applicable law or agreed to in writing, software
00015 // distributed under the License is distributed on an "AS IS" BASIS,
00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00017 // See the License for the specific language governing permissions and
00018 // limitations under the License.
00019 //
00021 
00022 
00023 #ifndef TESSERACT_CCUTIL_BITVECTOR_H__
00024 #define TESSERACT_CCUTIL_BITVECTOR_H__
00025 
00026 #include <assert.h>
00027 #include <stdio.h>
00028 #include "host.h"
00029 
00030 namespace tesseract {
00031 
00032 // Trivial class to encapsulate a fixed-length array of bits, with
00033 // Serialize/DeSerialize. Replaces the old macros.
00034 class BitVector {
00035  public:
00036   // Fast lookup table to get the first least significant set bit in a byte.
00037   // For zero, the table has 255, but since it is a special case, most code
00038   // that uses this table will check for zero before looking up lsb_index_.
00039   static const uinT8 lsb_index_[256];
00040   // Fast lookup table to get the residual bits after zeroing the least
00041   // significant set bit in a byte.
00042   static const uinT8 lsb_eroded_[256];
00043   // Fast lookup table to give the number of set bits in a byte.
00044   static const int hamming_table_[256];
00045 
00046   BitVector();
00047   // Initializes the array to length * false.
00048   explicit BitVector(int length);
00049   BitVector(const BitVector& src);
00050   BitVector& operator=(const BitVector& src);
00051   ~BitVector();
00052 
00053   // Initializes the array to length * false.
00054   void Init(int length);
00055 
00056   // Returns the number of bits that are accessible in the vector.
00057   int size() const {
00058     return bit_size_;
00059   }
00060 
00061   // Writes to the given file. Returns false in case of error.
00062   bool Serialize(FILE* fp) const;
00063   // Reads from the given file. Returns false in case of error.
00064   // If swap is true, assumes a big/little-endian swap is needed.
00065   bool DeSerialize(bool swap, FILE* fp);
00066 
00067   void SetAllFalse();
00068   void SetAllTrue();
00069 
00070   // Accessors to set/reset/get bits.
00071   // The range of index is [0, size()-1].
00072   // There is debug-only bounds checking.
00073   void SetBit(int index) {
00074     array_[WordIndex(index)] |= BitMask(index);
00075   }
00076   void ResetBit(int index) {
00077     array_[WordIndex(index)] &= ~BitMask(index);
00078   }
00079   void SetValue(int index, bool value) {
00080     if (value)
00081       SetBit(index);
00082     else
00083       ResetBit(index);
00084   }
00085   bool At(int index) const {
00086     return (array_[WordIndex(index)] & BitMask(index)) != 0;
00087   }
00088   bool operator[](int index) const {
00089     return (array_[WordIndex(index)] & BitMask(index)) != 0;
00090   }
00091 
00092   // Returns the index of the next set bit after the given index.
00093   // Useful for quickly iterating through the set bits in a sparse vector.
00094   int NextSetBit(int prev_bit) const;
00095 
00096   // Returns the number of set bits in the vector.
00097   int NumSetBits() const;
00098 
00099   // Logical in-place operations on whole bit vectors. Tries to do something
00100   // sensible if they aren't the same size, but they should be really.
00101   void operator|=(const BitVector& other);
00102   void operator&=(const BitVector& other);
00103   void operator^=(const BitVector& other);
00104   // Set subtraction *this = v1 - v2.
00105   void SetSubtract(const BitVector& v1, const BitVector& v2);
00106 
00107  private:
00108   // Allocates memory for a vector of the given length.
00109   void Alloc(int length);
00110 
00111   // Computes the index to array_ for the given index, with debug range
00112   // checking.
00113   int WordIndex(int index) const {
00114     assert(0 <= index && index < bit_size_);
00115     return index / kBitFactor;
00116   }
00117   // Returns a mask to select the appropriate bit for the given index.
00118   uinT32 BitMask(int index) const {
00119     return 1 << (index & (kBitFactor - 1));
00120   }
00121   // Returns the number of array elements needed to represent the current
00122   // bit_size_.
00123   int WordLength() const {
00124     return (bit_size_ + kBitFactor - 1) / kBitFactor;
00125   }
00126   // Returns the number of bytes consumed by the array_.
00127   int ByteLength() const {
00128     return WordLength() * sizeof(*array_);
00129   }
00130 
00131   // Number of bits in this BitVector.
00132   inT32 bit_size_;
00133   // Array of words used to pack the bits.
00134   // Bits are stored little-endian by uinT32 word, ie by word first and then
00135   // starting with the least significant bit in each word.
00136   uinT32* array_;
00137   // Number of bits in an array_ element.
00138   static const int kBitFactor = sizeof(uinT32) * 8;
00139 };
00140 
00141 }  // namespace tesseract.
00142 
00143 #endif  // TESSERACT_CCUTIL_BITVECTOR_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines