LTP GCOV extension - code coverage report
Current view: directory - ept/textsearch - textsearch.h
Test: lcov.info
Date: 2008-08-14 Instrumented lines: 15
Code covered: 86.7 % Executed lines: 13

       1                 : #ifndef EPT_TEXTSEARCH_TEXTSEARCH_H
       2                 : #define EPT_TEXTSEARCH_TEXTSEARCH_H
       3                 : 
       4                 : /** @file
       5                 :  * @author Enrico Zini <enrico@enricozini.org>
       6                 :  * Fast full-text search
       7                 :  */
       8                 : 
       9                 : /*
      10                 :  * Copyright (C) 2007  Enrico Zini <enrico@debian.org>
      11                 :  *
      12                 :  * This program is free software; you can redistribute it and/or modify
      13                 :  * it under the terms of the GNU General Public License as published by
      14                 :  * the Free Software Foundation; either version 2 of the License, or
      15                 :  * (at your option) any later version.
      16                 :  *
      17                 :  * This program is distributed in the hope that it will be useful,
      18                 :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19                 :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      20                 :  * GNU General Public License for more details.
      21                 :  *
      22                 :  * You should have received a copy of the GNU General Public License
      23                 :  * along with this program; if not, write to the Free Software
      24                 :  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      25                 :  */
      26                 : 
      27                 : #include <xapian.h>
      28                 : #include <vector>
      29                 : #include <string>
      30                 : 
      31                 : namespace ept {
      32                 : namespace apt {
      33                 : class Apt;
      34                 : class PackageRecord;
      35                 : }
      36                 : namespace debtags {
      37                 : class Debtags;
      38                 : }
      39                 : namespace textsearch {
      40                 : 
      41                 : // Allocate value indexes for known values
      42                 : const Xapian::valueno VAL_APT_INSTALLED_SIZE      =  1;
      43                 : const Xapian::valueno VAL_APT_PACKAGE_SIZE        =  2;
      44                 : const Xapian::valueno VAL_POPCON                  = 10;
      45                 : const Xapian::valueno VAL_ITERATING_RATING        = 20;
      46                 : const Xapian::valueno VAL_ITERATING_FUNCTIONALITY = 21;
      47                 : const Xapian::valueno VAL_ITERATING_USABILITY     = 22;
      48                 : const Xapian::valueno VAL_ITERATING_SECURITY      = 23;
      49                 : const Xapian::valueno VAL_ITERATING_PERFORMANCE   = 24;
      50                 : const Xapian::valueno VAL_ITERATING_QUALITY       = 25;
      51                 : const Xapian::valueno VAL_ITERATING_SUPPORT       = 26;
      52                 : const Xapian::valueno VAL_ITERATING_ADOPTION      = 27;
      53                 : // If you need to index a value and cannot edit this file, feel free to use any
      54                 : // value starting from 1000000
      55                 : 
      56                 : 
      57                 : /*
      58                 : Fallback on apt scan searches when index is not present
      59                 : 
      60                 : Explicitly decide at instantiation (or at any other time) if a rebuild should
      61                 : be performed.  Just adding a 'rebuildIfNeeded' method would be enough.
      62                 : 
      63                 : 17:14 #xapian < enrico> Hello.  I'm finally in a position of writing a library to maintain
      64                 :                         a xapian index with Debian package descriptions in a Debian system
      65                 : 17:14 #xapian < enrico> I have a question, though
      66                 : 17:14 #xapian < enrico> The descriptions change regularly as people run 'apt-get update'
      67                 : 17:15 #xapian < enrico> I'd need to have a way to update the description index after
      68                 :                         apt-get update, without rebuilding it from scratch
      69                 : 17:15 #xapian < enrico> Is there some documentation on how to do that?  I can't exactly
      70                 :                         tell Xapian "the new description for package foo is this" because
      71                 :                         I'd need the xapian id
      72                 : 19:11 #xapian < omega> you can add a unique term with a boolean prefix?
      73                 : 19:11 #xapian < omega> like Qpackage-name
      74                 : 19:11 #xapian < omega> then you search for it and replace_document
      75                 : 19:24 #xapian < richardb> Or indeed, you use the "replace_document()" form which takes a
      76                 :                           unique_id term.
      77                 : 19:25 #xapian < richardb>         Xapian::docid replace_document(const std::string &
      78                 :                           unique_term,
      79                 : 19:25 #xapian < richardb>                                        const Xapian::Document &
      80                 :                           document);
      81                 : 19:43 #xapian < enrico> unique term
      82                 : 19:43 #xapian < enrico> nice!
      83                 : 19:44 #xapian < enrico> can I use a non-alpha prefix, like :package-name ?
      84                 : 19:45 #xapian < enrico> or pkg:package-name
      85                 : 19:45 #xapian < enrico> I suppose I can
      86                 : */
      87                 : 
      88                 : /**
      89                 :  * Maintains and accesses a Xapian index of package descriptions.
      90                 :  *
      91                 :  * Contrarily to Debtags and Popcon, TextSearch does not attempt to create the
      92                 :  * index in the home directory if no system index is found and it is not
      93                 :  * running as root: this is to avoid secretly building large indexes (>50Mb)
      94                 :  * in the home directory of users.
      95                 :  *
      96                 :  * The idea then is to have root keep the index up to date, possibly running a
      97                 :  * reindexing tool once a day, or after an apt-get update.
      98                 :  *
      99                 :  * This works because the full text search index is useful even if it is
     100                 :  * slightly out of date.
     101                 :  */
     102                 : class TextSearch
     103              11 : {
     104                 : protected:
     105                 :         time_t m_timestamp;
     106                 :         Xapian::Database m_db;
     107                 :         Xapian::Stem m_stem;
     108                 : 
     109                 :         /// Return a lowercased copy of the string
     110                 :         static std::string toLower(const std::string& str);
     111                 : 
     112                 :         /**
     113                 :          * Add normalised tokens computed from the string to the document doc.
     114                 :          *
     115                 :          * pos is used as a sequence generator for entering the token position in
     116                 :          * the document.
     117                 :          */
     118                 :         void normalize_and_add(Xapian::Document& doc, const std::string& term, int& pos) const;
     119                 : 
     120                 : public:
     121                 :         struct ExtraIndexer
     122                 :         {
     123               0 :                 virtual ~ExtraIndexer() {}
     124                 :                 virtual void operator()(Xapian::Document& doc, const apt::PackageRecord& rec) const = 0;
     125                 :         };
     126                 : 
     127                 :         TextSearch();
     128                 : 
     129                 :         /// Access the Xapian database
     130               3 :         Xapian::Database& db() { return m_db; }
     131                 : 
     132                 :         /// Access the Xapian database
     133               4 :         const Xapian::Database& db() const { return m_db; }
     134                 : 
     135                 :         /// Timestamp of when the Xapian database was last updated
     136               3 :         time_t timestamp() const { return m_timestamp; }
     137                 : 
     138                 :         /// Returns true if the index has data
     139               3 :         bool hasData() const { return m_timestamp > 0; }
     140                 : 
     141                 :         /// Returns true if the index is older than the Apt database information
     142                 :         bool needsRebuild(apt::Apt& apt);
     143                 : 
     144                 :         /**
     145                 :          * Rebuild the index if needed.
     146                 :          *
     147                 :          * Allow to specify functors that contribute to the indexing.
     148                 :          *
     149                 :          * @note This requires write access to the index directory.
     150                 :          * @note This is not the main way to update the index: it is provided here
     151                 :          *       only as a way to build a draft index for the library tests
     152                 :          */
     153                 :         bool rebuildIfNeeded(
     154                 :                 apt::Apt& apt,
     155                 :                 const std::vector<const ExtraIndexer*>& extraIndexers = std::vector<const ExtraIndexer*>());
     156                 : 
     157                 :         /**
     158                 :          * Retrieve a Xapian docid by package name
     159                 :          */
     160                 :         Xapian::docid docidByName(const std::string& pkgname) const;
     161                 : 
     162                 :         /**
     163                 :          * Tokenize the string and build an OR query with the resulting keywords
     164                 :          */
     165                 :         Xapian::Query makeORQuery(const std::string& keywords) const;
     166                 : 
     167                 :         /**
     168                 :          * Tokenize the string and build an OR query with the resulting keywords.
     169                 :          *
     170                 :          * The last token in keywords is considered to be typed only partially, to
     171                 :          * implement proper search-as-you-type.
     172                 :          */
     173                 :         Xapian::Query makePartialORQuery(const std::string& keywords) const;
     174                 : 
     175                 :         /**
     176                 :          * Build a query with the given keywords, specified as iterators of strings
     177                 :          */
     178                 :         template<typename ITER>
     179               3 :         Xapian::Query makeORQuery(const ITER& begin, const ITER& end) const
     180                 :         {
     181               3 :                 std::vector<std::string> terms;
     182                 :                 // Insert both the lowercased and the stemmed lowercased query terms
     183              10 :                 for (ITER i = begin; i != end; ++i)
     184                 :                 {
     185               7 :                         std::string t = toLower(*i);
     186               7 :                         std::string s = m_stem(t);
     187               7 :                         terms.push_back(t);
     188               7 :                         if (s != t)
     189               0 :                                 terms.push_back("Z" + s);
     190                 :                 }
     191               3 :                 return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end());
     192                 :         }
     193                 : 
     194                 :         /// Return a list of tag-based terms that can be used to expand an OR query
     195                 :         std::vector<std::string> expand(Xapian::Enquire& enq) const;
     196                 : 
     197                 : //      std::vector<std::string> similar(const std::string& pkg);
     198                 : 
     199                 :         /**
     200                 :          * Create a query to look for packages similar to the given one
     201                 :          */
     202                 :         Xapian::Query makeRelatedQuery(const std::string& pkgname) const;
     203                 : 
     204                 :         /**
     205                 :          * Get the integer value for 
     206                 :          */
     207                 :         double getDoubleValue(const std::string& pkgname, Xapian::valueno val_id) const;
     208                 : 
     209                 :         /**
     210                 :          * Get the integer value for 
     211                 :          */
     212                 :         int getIntValue(const std::string& pkgname, Xapian::valueno val_id) const;
     213                 : };
     214                 : 
     215                 : }
     216                 : }
     217                 : 
     218                 : // vim:set ts=4 sw=4:
     219                 : #endif

Generated by: LTP GCOV extension version 1.6