LTP GCOV extension - code coverage report
Current view: directory - usr/include/xapian - enquire.h
Test: lcov.info
Date: 2008-08-14 Instrumented lines: 29
Code covered: 89.7 % Executed lines: 26

       1                 : /** \file enquire.h
       2                 :  * \brief API for running queries
       3                 :  */
       4                 : /* Copyright 1999,2000,2001 BrightStation PLC
       5                 :  * Copyright 2001,2002 Ananova Ltd
       6                 :  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
       7                 :  *
       8                 :  * This program is free software; you can redistribute it and/or
       9                 :  * modify it under the terms of the GNU General Public License as
      10                 :  * published by the Free Software Foundation; either version 2 of the
      11                 :  * License, or (at your option) any later version.
      12                 :  *
      13                 :  * This program is distributed in the hope that it will be useful,
      14                 :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15                 :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16                 :  * GNU General Public License for more details.
      17                 :  *
      18                 :  * You should have received a copy of the GNU General Public License
      19                 :  * along with this program; if not, write to the Free Software
      20                 :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      21                 :  * USA
      22                 :  */
      23                 : 
      24                 : #ifndef XAPIAN_INCLUDED_ENQUIRE_H
      25                 : #define XAPIAN_INCLUDED_ENQUIRE_H
      26                 : 
      27                 : #include <string>
      28                 : 
      29                 : #include <xapian/base.h>
      30                 : #include <xapian/deprecated.h>
      31                 : #include <xapian/sorter.h>
      32                 : #include <xapian/types.h>
      33                 : #include <xapian/termiterator.h>
      34                 : #include <xapian/visibility.h>
      35                 : 
      36                 : namespace Xapian {
      37                 : 
      38                 : class Database;
      39                 : class Document;
      40                 : class ErrorHandler;
      41                 : class ExpandDecider;
      42                 : class MSetIterator;
      43                 : class Query;
      44                 : class Weight;
      45                 : 
      46                 : /** A match set (MSet).
      47                 :  *  This class represents (a portion of) the results of a query.
      48                 :  */
      49                 : class XAPIAN_VISIBILITY_DEFAULT MSet {
      50                 :     public:
      51                 :         class Internal;
      52                 :         /// @internal Reference counted internals.
      53                 :         Xapian::Internal::RefCntPtr<Internal> internal;
      54                 : 
      55                 :         /// @internal Constructor for internal use.
      56                 :         explicit MSet(MSet::Internal * internal_);
      57                 : 
      58                 :         /// Create an empty Xapian::MSet.
      59                 :         MSet();
      60                 : 
      61                 :         /// Destroy a Xapian::MSet.
      62                 :         ~MSet();
      63                 : 
      64                 :         /// Copying is allowed (and is cheap).
      65                 :         MSet(const MSet & other);
      66                 : 
      67                 :         /// Assignment is allowed (and is cheap).
      68                 :         void operator=(const MSet &other);
      69                 : 
      70                 :         /** Fetch the document info for a set of items in the MSet.
      71                 :          *
      72                 :          *  This method causes the documents in the range specified by the
      73                 :          *  iterators to be fetched from the database, and cached in the
      74                 :          *  Xapian::MSet object.  This has little effect when performing a
      75                 :          *  search across a local database, but will greatly speed up
      76                 :          *  subsequent access to the document contents when the documents are
      77                 :          *  stored in a remote database.
      78                 :          *
      79                 :          *  The iterators must be over this Xapian::MSet - undefined behaviour
      80                 :          *  will result otherwise.
      81                 :          *
      82                 :          *  @param begin   MSetIterator for first item to fetch.
      83                 :          *  @param end     MSetIterator for item after last item to fetch.
      84                 :          */
      85                 :         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
      86                 : 
      87                 :         /** Fetch the single item specified.
      88                 :          */
      89                 :         void fetch(const MSetIterator &item) const;
      90                 : 
      91                 :         /** Fetch all the items in the MSet.
      92                 :          */
      93                 :         void fetch() const;
      94                 : 
      95                 :         /** This converts the weight supplied to a percentage score.
      96                 :          *  The return value will be in the range 0 to 100, and will be 0 if
      97                 :          *  and only if the item did not match the query at all.
      98                 :          */
      99                 :         Xapian::percent convert_to_percent(Xapian::weight wt) const;
     100                 : 
     101                 :         /// Return the percentage score for a particular item.
     102                 :         Xapian::percent convert_to_percent(const MSetIterator &it) const;
     103                 : 
     104                 :         /** Return the term frequency of the given query term.
     105                 :          *
     106                 :          *  @param tname The term to look for.
     107                 :          *
     108                 :          *  @exception Xapian::InvalidArgumentError is thrown if the term was
     109                 :          *             not in the query.
     110                 :          */
     111                 :         Xapian::doccount get_termfreq(const std::string &tname) const;
     112                 : 
     113                 :         /** Return the term weight of the given query term.
     114                 :          *
     115                 :          *  @param tname The term to look for.
     116                 :          *
     117                 :          *  @exception  Xapian::InvalidArgumentError is thrown if the term was
     118                 :          *              not in the query.
     119                 :          */
     120                 :         Xapian::weight get_termweight(const std::string &tname) const;
     121                 : 
     122                 :         /** The index of the first item in the result which was put into the
     123                 :          *  MSet.
     124                 :          *
     125                 :          *  This corresponds to the parameter "first" specified in
     126                 :          *  Xapian::Enquire::get_mset().  A value of 0 corresponds to the
     127                 :          *  highest result being the first item in the MSet.
     128                 :          */
     129                 :         Xapian::doccount get_firstitem() const;
     130                 : 
     131                 :         /** A lower bound on the number of documents in the database which
     132                 :          *  match the query.
     133                 :          *
     134                 :          *  This figure takes into account collapsing of duplicates,
     135                 :          *  and weighting cutoff values.
     136                 :          *
     137                 :          *  This number is usually considerably less than the actual number
     138                 :          *  of documents which match the query.
     139                 :          */
     140                 :         Xapian::doccount get_matches_lower_bound() const;
     141                 : 
     142                 :         /** An estimate for the number of documents in the database which
     143                 :          *  match the query.
     144                 :          *
     145                 :          *  This figure takes into account collapsing of duplicates,
     146                 :          *  and weighting cutoff values.
     147                 :          *
     148                 :          *  This value is returned because there is sometimes a request to
     149                 :          *  display such information.  However, our experience is that
     150                 :          *  presenting this value to users causes them to worry about the
     151                 :          *  large number of results, rather than how useful those at the top
     152                 :          *  of the result set are, and is thus undesirable.
     153                 :          */
     154                 :         Xapian::doccount get_matches_estimated() const;
     155                 : 
     156                 :         /** An upper bound on the number of documents in the database which
     157                 :          *  match the query.
     158                 :          *
     159                 :          *  This figure takes into account collapsing of duplicates,
     160                 :          *  and weighting cutoff values.
     161                 :          *
     162                 :          *  This number is usually considerably greater than the actual
     163                 :          *  number of documents which match the query.
     164                 :          */
     165                 :         Xapian::doccount get_matches_upper_bound() const;
     166                 : 
     167                 :         /** The maximum possible weight in the MSet.
     168                 :          *  This weight is likely not to be attained in the set of results,
     169                 :          *  but represents an upper bound on the weight which a document
     170                 :          *  could attain for the given query.
     171                 :          */
     172                 :         Xapian::weight get_max_possible() const;
     173                 : 
     174                 :         /** The greatest weight which is attained by any document in the
     175                 :          *  database.
     176                 :          *
     177                 :          *  If firstitem == 0, this is the weight of the first entry in
     178                 :          *  items.
     179                 :          *
     180                 :          *  If no documents are found by the query, this will be 0.
     181                 :          *
     182                 :          *  Note that calculation of max_attained requires calculation
     183                 :          *  of at least one result item - therefore, if no items were
     184                 :          *  requested when the query was performed (by specifying
     185                 :          *  maxitems = 0 in Xapian::Enquire::get_mset()), this value will be 0.
     186                 :          */
     187                 :         Xapian::weight get_max_attained() const;
     188                 : 
     189                 :         /** The number of items in this MSet */
     190                 :         Xapian::doccount size() const;
     191                 : 
     192                 :         /** Required to allow use as an STL container. */
     193                 :         Xapian::doccount max_size() const { return size(); }
     194                 : 
     195                 :         /** Test if this MSet is empty */
     196                 :         bool empty() const;
     197                 : 
     198                 :         /** Swap the MSet we point to with another */
     199                 :         void swap(MSet & other);
     200                 : 
     201                 :         /** Iterator for the terms in this MSet */
     202                 :         MSetIterator begin() const;
     203                 : 
     204                 :         /** End iterator corresponding to begin() */
     205                 :         MSetIterator end() const;
     206                 : 
     207                 :         /** Iterator pointing to the last element of this MSet */
     208                 :         MSetIterator back() const;
     209                 : 
     210                 :         /** This returns the document at position i in this MSet object.
     211                 :          *
     212                 :          *  Note that this is not the same as the document at rank i in the
     213                 :          *  query, unless the "first" parameter to Xapian::Enquire::get_mset
     214                 :          *  was 0.  Rather, it is the document at rank i + first.
     215                 :          *
     216                 :          *  In other words, the offset is into the documents represented by
     217                 :          *  this object, not into the set of documents matching the query.
     218                 :          */
     219                 :         MSetIterator operator[](Xapian::doccount i) const;
     220                 : 
     221                 :         /// Allow use as an STL container
     222                 :         //@{
     223                 :         typedef MSetIterator value_type; // FIXME: not assignable...
     224                 :         typedef MSetIterator iterator;
     225                 :         typedef MSetIterator const_iterator;
     226                 :         typedef MSetIterator & reference; // Hmm
     227                 :         typedef MSetIterator & const_reference;
     228                 :         typedef MSetIterator * pointer; // Hmm
     229                 :         typedef Xapian::doccount_diff difference_type;
     230                 :         typedef Xapian::doccount size_type;
     231                 :         //@}
     232                 : 
     233                 :         /// Return a string describing this object.
     234                 :         std::string get_description() const;
     235                 : };
     236                 : 
     237                 : /** An iterator pointing to items in an MSet.
     238                 :  *  This is used for access to individual results of a match.
     239                 :  */
     240                 : class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
     241                 :     private:
     242                 :         friend class MSet;
     243                 :         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
     244                 :         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
     245                 : 
     246                 :         MSetIterator(Xapian::doccount index_, const MSet & mset_)
     247                 :             : index(index_), mset(mset_) { }
     248                 : 
     249                 :         Xapian::doccount index;
     250                 :         MSet mset;
     251                 : 
     252                 :     public:
     253                 :         /** Create an uninitialised iterator; this cannot be used, but is
     254                 :          *  convenient syntactically.
     255                 :          */
     256               3 :         MSetIterator() : index(0), mset() { }
     257                 : 
     258             165 :         ~MSetIterator() { }
     259                 : 
     260                 :         /// Copying is allowed (and is cheap).
     261              58 :         MSetIterator(const MSetIterator &other) {
     262              58 :             index = other.index;
     263              58 :             mset = other.mset;
     264              58 :         }
     265                 : 
     266                 :         /// Assignment is allowed (and is cheap).
     267              31 :         void operator=(const MSetIterator &other) {
     268              31 :             index = other.index;
     269              31 :             mset = other.mset;
     270              31 :         }
     271                 : 
     272                 :         /// Advance the iterator.
     273              61 :         MSetIterator & operator++() {
     274              61 :             ++index;
     275              61 :             return *this;
     276                 :         }
     277                 : 
     278                 :         /// Advance the iterator (postfix variant).
     279              28 :         MSetIterator operator++(int) {
     280              28 :             MSetIterator tmp = *this;
     281              28 :             ++index;
     282                 :             return tmp;
     283                 :         }
     284                 : 
     285                 :         /// Decrement the iterator.
     286                 :         MSetIterator & operator--() {
     287                 :             --index;
     288                 :             return *this;
     289                 :         }
     290                 : 
     291                 :         /// Decrement the iterator (postfix variant).
     292                 :         MSetIterator operator--(int) {
     293                 :             MSetIterator tmp = *this;
     294                 :             --index;
     295                 :             return tmp;
     296                 :         }
     297                 : 
     298                 :         /// Get the document ID for the current position.
     299                 :         Xapian::docid operator*() const;
     300                 : 
     301                 :         /** Get a Xapian::Document object for the current position.
     302                 :          *
     303                 :          *  This method returns a Xapian::Document object which provides the
     304                 :          *  information about the document pointed to by the MSetIterator.
     305                 :          *
     306                 :          *  If the underlying database has suitable support, using this call
     307                 :          *  (rather than asking the database for a document based on its
     308                 :          *  document ID) will enable the system to ensure that the correct
     309                 :          *  data is returned, and that the document has not been deleted
     310                 :          *  or changed since the query was performed.
     311                 :          *
     312                 :          *  @return     A Xapian::Document object containing the document data.
     313                 :          *
     314                 :          *  @exception Xapian::DocNotFoundError The document specified could not
     315                 :          *             be found in the database.
     316                 :          */
     317                 :         Xapian::Document get_document() const;
     318                 : 
     319                 :         /** Get the rank of the document at the current position.
     320                 :          *
     321                 :          *  The rank is the position that this document is at in the ordered
     322                 :          *  list of results of the query.  The result is 0-based - i.e. the
     323                 :          *  top-ranked document has a rank of 0.
     324                 :          */
     325                 :         Xapian::doccount get_rank() const {
     326                 :             return mset.get_firstitem() + index;
     327                 :         }
     328                 : 
     329                 :         /// Get the weight of the document at the current position
     330                 :         Xapian::weight get_weight() const;
     331                 : 
     332                 :         /** Get the collapse key for this document.
     333                 :          */
     334                 :         std::string get_collapse_key() const;
     335                 : 
     336                 :         /** Get an estimate of the number of documents that have been collapsed
     337                 :          *  into this one.
     338                 :          *
     339                 :          *  The estimate will always be less than or equal to the actual
     340                 :          *  number of other documents satisfying the match criteria with the
     341                 :          *  same collapse key as this document.
     342                 :          *
     343                 :          *  This method may return 0 even though there are other documents with
     344                 :          *  the same collapse key which satisfying the match criteria.  However
     345                 :          *  if this method returns non-zero, there definitely are other such
     346                 :          *  documents.  So this method may be used to inform the user that
     347                 :          *  there are "at least N other matches in this group", or to control
     348                 :          *  whether to offer a "show other documents in this group" feature
     349                 :          *  (but note that it may not offer it in every case where it would
     350                 :          *  show other documents).
     351                 :          */
     352                 :         Xapian::doccount get_collapse_count() const;
     353                 : 
     354                 :         /** This returns the weight of the document as a percentage score.
     355                 :          *
     356                 :          *  The return value will be in the range 0 to 100:  0 meaning
     357                 :          *  that the item did not match the query at all.
     358                 :          */
     359                 :         Xapian::percent get_percent() const;
     360                 : 
     361                 :         /// Return a string describing this object.
     362                 :         std::string get_description() const;
     363                 : 
     364                 :         /// Allow use as an STL iterator
     365                 :         //@{
     366                 :         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
     367                 :         typedef Xapian::docid value_type;
     368                 :         typedef Xapian::doccount_diff difference_type;
     369                 :         typedef Xapian::docid * pointer;
     370                 :         typedef Xapian::docid & reference;
     371                 :         //@}
     372                 : };
     373                 : 
     374              30 : inline bool operator==(const MSetIterator &a, const MSetIterator &b)
     375                 : {
     376              30 :     return (a.index == b.index);
     377                 : }
     378                 : 
     379              58 : inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
     380                 : {
     381              58 :     return (a.index != b.index);
     382                 : }
     383                 : 
     384                 : class ESetIterator;
     385                 : 
     386                 : /** Class representing an ordered set of expand terms (an ESet).
     387                 :  *  This set represents the results of an expand operation, which is
     388                 :  *  performed by Xapian::Enquire::get_eset().
     389                 :  */
     390                 : class XAPIAN_VISIBILITY_DEFAULT ESet {
     391                 :     public:
     392                 :         class Internal;
     393                 :         /// @internal Reference counted internals.
     394                 :         Xapian::Internal::RefCntPtr<Internal> internal;
     395                 : 
     396                 :         /// Construct an empty ESet
     397                 :         ESet();
     398                 : 
     399                 :         /// Destructor.
     400                 :         ~ESet();
     401                 : 
     402                 :         /// Copying is allowed (and is cheap).
     403                 :         ESet(const ESet & other);
     404                 : 
     405                 :         /// Assignment is allowed (and is cheap).
     406                 :         void operator=(const ESet &other);
     407                 : 
     408                 :         /** A lower bound on the number of terms which are in the full
     409                 :          *  set of results of the expand.  This will be greater than or
     410                 :          *  equal to size()
     411                 :          */
     412                 :         Xapian::termcount get_ebound() const;
     413                 : 
     414                 :         /** The number of terms in this E-Set */
     415                 :         Xapian::termcount size() const;
     416                 : 
     417                 :         /** Required to allow use as an STL container. */
     418                 :         Xapian::termcount max_size() const { return size(); }
     419                 : 
     420                 :         /** Test if this E-Set is empty */
     421                 :         bool empty() const;
     422                 : 
     423                 :         /** Swap the E-Set we point to with another */
     424                 :         void swap(ESet & other);
     425                 : 
     426                 :         /** Iterator for the terms in this E-Set */
     427                 :         ESetIterator begin() const;
     428                 : 
     429                 :         /** End iterator corresponding to begin() */
     430                 :         ESetIterator end() const;
     431                 : 
     432                 :         /** Iterator pointing to the last element of this E-Set */
     433                 :         ESetIterator back() const;
     434                 : 
     435                 :         /** This returns the term at position i in this E-Set.  */
     436                 :         ESetIterator operator[](Xapian::termcount i) const;
     437                 : 
     438                 :         /// Return a string describing this object.
     439                 :         std::string get_description() const;
     440                 : };
     441                 : 
     442                 : /** Iterate through terms in the ESet */
     443                 : class XAPIAN_VISIBILITY_DEFAULT ESetIterator {
     444                 :     private:
     445                 :         friend class ESet;
     446                 :         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
     447                 :         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
     448                 : 
     449                 :         ESetIterator(Xapian::termcount index_, const ESet & eset_)
     450                 :             : index(index_), eset(eset_) { }
     451                 : 
     452                 :         Xapian::termcount index;
     453                 :         ESet eset;
     454                 : 
     455                 :     public:
     456                 :         /** Create an uninitialised iterator; this cannot be used, but is
     457                 :          *  convenient syntactically.
     458                 :          */
     459                 :         ESetIterator() : index(0), eset() { }
     460                 : 
     461               8 :         ~ESetIterator() { }
     462                 : 
     463                 :         /// Copying is allowed (and is cheap).
     464                 :         ESetIterator(const ESetIterator &other) {
     465                 :             index = other.index;
     466                 :             eset = other.eset;
     467                 :         }
     468                 : 
     469                 :         /// Assignment is allowed (and is cheap).
     470                 :         void operator=(const ESetIterator &other) {
     471                 :             index = other.index;
     472                 :             eset = other.eset;
     473                 :         }
     474                 : 
     475                 :         /// Advance the iterator.
     476               0 :         ESetIterator & operator++() {
     477               0 :             ++index;
     478               0 :             return *this;
     479                 :         }
     480                 : 
     481                 :         /// Advance the iterator (postfix variant).
     482                 :         ESetIterator operator++(int) {
     483                 :             ESetIterator tmp = *this;
     484                 :             ++index;
     485                 :             return tmp;
     486                 :         }
     487                 : 
     488                 :         /// Decrement the iterator.
     489                 :         ESetIterator & operator--() {
     490                 :             --index;
     491                 :             return *this;
     492                 :         }
     493                 : 
     494                 :         /// Decrement the iterator (postfix variant).
     495                 :         ESetIterator operator--(int) {
     496                 :             ESetIterator tmp = *this;
     497                 :             --index;
     498                 :             return tmp;
     499                 :         }
     500                 : 
     501                 :         /// Get the term for the current position
     502                 :         const std::string & operator *() const;
     503                 : 
     504                 :         /// Get the weight of the term at the current position
     505                 :         Xapian::weight get_weight() const;
     506                 : 
     507                 :         /// Return a string describing this object.
     508                 :         std::string get_description() const;
     509                 : 
     510                 :         /// Allow use as an STL iterator
     511                 :         //@{
     512                 :         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
     513                 :         typedef std::string value_type;
     514                 :         typedef Xapian::termcount_diff difference_type;
     515                 :         typedef std::string * pointer;
     516                 :         typedef std::string & reference;
     517                 :         //@}
     518                 : };
     519                 : 
     520                 : inline bool operator==(const ESetIterator &a, const ESetIterator &b)
     521                 : {
     522                 :     return (a.index == b.index);
     523                 : }
     524                 : 
     525               4 : inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
     526                 : {
     527               4 :     return (a.index != b.index);
     528                 : }
     529                 : 
     530                 : /** A relevance set (R-Set).
     531                 :  *  This is the set of documents which are marked as relevant, for use
     532                 :  *  in modifying the term weights, and in performing query expansion.
     533                 :  */
     534                 : class XAPIAN_VISIBILITY_DEFAULT RSet {
     535                 :     public:
     536                 :         /// Class holding details of RSet
     537                 :         class Internal;
     538                 : 
     539                 :         /// @internal Reference counted internals.
     540                 :         Xapian::Internal::RefCntPtr<Internal> internal;
     541                 : 
     542                 :         /// Copy constructor
     543                 :         RSet(const RSet &rset);
     544                 : 
     545                 :         /// Assignment operator
     546                 :         void operator=(const RSet &rset);
     547                 : 
     548                 :         /// Default constructor
     549                 :         RSet();
     550                 : 
     551                 :         /// Destructor
     552                 :         ~RSet();
     553                 : 
     554                 :         /** The number of documents in this R-Set */
     555                 :         Xapian::doccount size() const;
     556                 : 
     557                 :         /** Test if this R-Set is empty */
     558                 :         bool empty() const;
     559                 : 
     560                 :         /// Add a document to the relevance set.
     561                 :         void add_document(Xapian::docid did);
     562                 : 
     563                 :         /// Add a document to the relevance set.
     564              20 :         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
     565                 : 
     566                 :         /// Remove a document from the relevance set.
     567                 :         void remove_document(Xapian::docid did);
     568                 : 
     569                 :         /// Remove a document from the relevance set.
     570                 :         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
     571                 : 
     572                 :         /// Test if a given document in the relevance set.
     573                 :         bool contains(Xapian::docid did) const;
     574                 : 
     575                 :         /// Test if a given document in the relevance set.
     576                 :         bool contains(const Xapian::MSetIterator & i) const { return contains(*i); }
     577                 : 
     578                 :         /// Return a string describing this object.
     579                 :         std::string get_description() const;
     580                 : };
     581                 : 
     582                 : /** Base class for matcher decision functor.
     583                 :  */
     584                 : class XAPIAN_VISIBILITY_DEFAULT MatchDecider {
     585                 :     public:
     586                 :         /** Decide whether we want this document to be in the MSet.
     587                 :          *
     588                 :          *  Return true if the document is acceptable, or false if the document
     589                 :          *  should be excluded from the MSet.
     590                 :          */
     591                 :         virtual bool operator()(const Xapian::Document &doc) const = 0;
     592                 : 
     593                 :         /// Destructor.
     594                 :         virtual ~MatchDecider();
     595                 : };
     596                 : 
     597                 : /** This class provides an interface to the information retrieval
     598                 :  *  system for the purpose of searching.
     599                 :  *
     600                 :  *  Databases are usually opened lazily, so exceptions may not be
     601                 :  *  thrown where you would expect them to be.  You should catch
     602                 :  *  Xapian::Error exceptions when calling any method in Xapian::Enquire.
     603                 :  *
     604                 :  *  @exception Xapian::InvalidArgumentError will be thrown if an invalid
     605                 :  *  argument is supplied, for example, an unknown database type.
     606                 :  */
     607                 : class XAPIAN_VISIBILITY_DEFAULT Enquire {
     608                 :     public:
     609                 :         /// Copying is allowed (and is cheap).
     610                 :         Enquire(const Enquire & other);
     611                 : 
     612                 :         /// Assignment is allowed (and is cheap).
     613                 :         void operator=(const Enquire & other);
     614                 : 
     615                 :         class Internal;
     616                 :         /// @internal Reference counted internals.
     617                 :         Xapian::Internal::RefCntPtr<Internal> internal;
     618                 : 
     619                 :         /** Create a Xapian::Enquire object.
     620                 :          *
     621                 :          *  This specification cannot be changed once the Xapian::Enquire is
     622                 :          *  opened: you must create a new Xapian::Enquire object to access a
     623                 :          *  different database, or set of databases.
     624                 :          *
     625                 :          *  The database supplied must have been initialised (ie, must not be
     626                 :          *  the result of calling the Database::Database() constructor).  If
     627                 :          *  you need to handle a situation where you have no index gracefully,
     628                 :          *  a database created with InMemory::open() can be passed here,
     629                 :          *  which represents a completely empty database.
     630                 :          *
     631                 :          *  @param database Specification of the database or databases to
     632                 :          *         use.
     633                 :          *  @param errorhandler_  A pointer to the error handler to use.
     634                 :          *         Ownership of the object pointed to is not assumed by the
     635                 :          *         Xapian::Enquire object - the user should delete the
     636                 :          *         Xapian::ErrorHandler object after the Xapian::Enquire object
     637                 :          *         is deleted.  To use no error handler, this parameter
     638                 :          *         should be 0.
     639                 :          *
     640                 :          *  @exception Xapian::InvalidArgumentError will be thrown if an
     641                 :          *  initialised Database object is supplied.
     642                 :          */
     643                 :         explicit Enquire(const Database &database, ErrorHandler * errorhandler_ = 0);
     644                 : 
     645                 :         /** Close the Xapian::Enquire object.
     646                 :          */
     647                 :         ~Enquire();
     648                 : 
     649                 :         /** Set the query to run.
     650                 :          *
     651                 :          *  @param query  the new query to run.
     652                 :          *  @param qlen   the query length to use in weight calculations -
     653                 :          *      by default the sum of the wqf of all terms is used.
     654                 :          */
     655                 :         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
     656                 : 
     657                 :         /** Get the query which has been set.
     658                 :          *  This is only valid after set_query() has been called.
     659                 :          *
     660                 :          *  @exception Xapian::InvalidArgumentError will be thrown if query has
     661                 :          *             not yet been set.
     662                 :          */
     663                 :         const Xapian::Query & get_query() const;
     664                 : 
     665                 :         /** Set the weighting scheme to use for queries.
     666                 :          *
     667                 :          *  @param weight_  the new weighting scheme.  If no weighting scheme
     668                 :          *                  is specified, the default is BM25 with the
     669                 :          *                  default parameters.
     670                 :          */
     671                 :         void set_weighting_scheme(const Weight &weight_);
     672                 : 
     673                 :         /** Set the collapse key to use for queries.
     674                 :          *
     675                 :          *  @param collapse_key  value number to collapse on - at most one MSet
     676                 :          *      entry with each particular value will be returned.
     677                 :          *
     678                 :          *      The entry returned will be the best entry with that particular
     679                 :          *      value (highest weight or highest sorting key).
     680                 :          *
     681                 :          *      An example use might be to create a value for each document
     682                 :          *      containing an MD5 hash of the document contents.  Then
     683                 :          *      duplicate documents from different sources can be eliminated at
     684                 :          *      search time (it's better to eliminate duplicates at index time,
     685                 :          *      but this may not be always be possible - for example the search
     686                 :          *      may be over more than one Xapian database).
     687                 :          *
     688                 :          *      Another use is to group matches in a particular category (e.g.
     689                 :          *      you might collapse a mailing list search on the Subject: so
     690                 :          *      that there's only one result per discussion thread).  In this
     691                 :          *      case you can use get_collapse_count() to give the user some
     692                 :          *      idea how many other results there are.  And if you index the
     693                 :          *      Subject: as a boolean term as well as putting it in a value,
     694                 :          *      you can offer a link to a non-collapsed search restricted to
     695                 :          *      that thread using a boolean filter.
     696                 :          *
     697                 :          *      (default is Xapian::BAD_VALUENO which means no collapsing).
     698                 :          */
     699                 :         void set_collapse_key(Xapian::valueno collapse_key);
     700                 : 
     701                 :         typedef enum {
     702                 :             ASCENDING = 1,
     703                 :             DESCENDING = 0,
     704                 :             DONT_CARE = 2
     705                 :         } docid_order;
     706                 : 
     707                 :         /** Set the direction in which documents are ordered by document id
     708                 :          *  in the returned MSet.
     709                 :          *
     710                 :          *  This order only has an effect on documents which would otherwise
     711                 :          *  have equal rank.  For a weighted probabilistic match with no sort
     712                 :          *  value, this means documents with equal weight.  For a boolean match,
     713                 :          *  with no sort value, this means all documents.  And if a sort value
     714                 :          *  is used, this means documents with equal sort value (and also equal
     715                 :          *  weight if ordering on relevance after the sort).
     716                 :          *
     717                 :          * @param order  This can be:
     718                 :          * - Xapian::Enquire::ASCENDING
     719                 :          *      docids sort in ascending order (default)
     720                 :          * - Xapian::Enquire::DESCENDING
     721                 :          *      docids sort in descending order
     722                 :          * - Xapian::Enquire::DONT_CARE
     723                 :          *      docids sort in whatever order is most efficient for the backend
     724                 :          *
     725                 :          *  Note: If you add documents in strict date order, then a boolean
     726                 :          *  search - i.e. set_weighting_scheme(Xapian::BoolWeight()) - with
     727                 :          *  set_docid_order(Xapian::Enquire::DESCENDING) is a very efficient
     728                 :          *  way to perform "sort by date, newest first".
     729                 :          */
     730                 :         void set_docid_order(docid_order order);
     731                 : 
     732                 :         /** Set the percentage and/or weight cutoffs.
     733                 :          *
     734                 :          * @param percent_cutoff Minimum percentage score for returned
     735                 :          *      documents. If a document has a lower percentage score than this,
     736                 :          *      it will not appear in the MSet.  If your intention is to return
     737                 :          *      only matches which contain all the terms in the query, then
     738                 :          *      it's more efficient to use Xapian::Query::OP_AND instead of
     739                 :          *      Xapian::Query::OP_OR in the query than to use set_cutoff(100).
     740                 :          *      (default 0 => no percentage cut-off).
     741                 :          * @param weight_cutoff Minimum weight for a document to be returned.
     742                 :          *      If a document has a lower score that this, it will not appear
     743                 :          *      in the MSet.  It is usually only possible to choose an
     744                 :          *      appropriate weight for cutoff based on the results of a
     745                 :          *      previous run of the same query; this is thus mainly useful for
     746                 :          *      alerting operations.  The other potential use is with a user
     747                 :          *      specified weighting scheme.
     748                 :          *      (default 0 => no weight cut-off).
     749                 :          */
     750                 :         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
     751                 : 
     752                 :         /** Set the sorting to be by relevance only.
     753                 :          *
     754                 :          *  This is the default.
     755                 :          */
     756                 :         void set_sort_by_relevance();
     757                 : 
     758                 :         /** Set the sorting to be by value only.
     759                 :          *
     760                 :          *  NB sorting of values uses a string comparison, so you'll need to
     761                 :          *  store numbers padded with leading zeros or spaces, or with the
     762                 :          *  number of digits prepended.
     763                 :          *
     764                 :          * @param sort_key  value number to sort on.
     765                 :          *
     766                 :          * @param ascending  If true, documents values which sort higher by
     767                 :          *               string compare are better.  If false, the sort order
     768                 :          *               is reversed.  (default true)
     769                 :          */
     770                 :         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
     771                 : 
     772                 :         /** Set the sorting to be by key generated from values only.
     773                 :          *
     774                 :          * @param sorter    The functor to use for generating keys.
     775                 :          *
     776                 :          * @param ascending  If true, documents values which sort higher by
     777                 :          *               string compare are better.  If false, the sort order
     778                 :          *               is reversed.  (default true)
     779                 :          */
     780                 :         void set_sort_by_key(Xapian::Sorter * sorter, bool ascending = true);
     781                 : 
     782                 :         /** Set the sorting to be by value, then by relevance for documents
     783                 :          *  with the same value.
     784                 :          *
     785                 :          *  NB sorting of values uses a string comparison, so you'll need to
     786                 :          *  store numbers padded with leading zeros or spaces, or with the
     787                 :          *  number of digits prepended.
     788                 :          *
     789                 :          * @param sort_key  value number to sort on.
     790                 :          *
     791                 :          * @param ascending  If true, documents values which sort higher by
     792                 :          *               string compare are better.  If false, the sort order
     793                 :          *               is reversed.  (default true)
     794                 :          */
     795                 :         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
     796                 :                                               bool ascending = true);
     797                 : 
     798                 :         /** Set the sorting to be by keys generated from values, then by
     799                 :          *  relevance for documents with identical keys.
     800                 :          *
     801                 :          * @param sorter    The functor to use for generating keys.
     802                 :          *
     803                 :          * @param ascending  If true, keys which sort higher by
     804                 :          *               string compare are better.  If false, the sort order
     805                 :          *               is reversed.  (default true)
     806                 :          */
     807                 :         void set_sort_by_key_then_relevance(Xapian::Sorter * sorter,
     808                 :                                             bool ascending = true);
     809                 : 
     810                 :         /** Set the sorting to be by relevance then value.
     811                 :          *
     812                 :          *  NB sorting of values uses a string comparison, so you'll need to
     813                 :          *  store numbers padded with leading zeros or spaces, or with the
     814                 :          *  number of digits prepended.
     815                 :          *
     816                 :          *  Note that with the default BM25 weighting scheme parameters,
     817                 :          *  non-identical documents will rarely have the same weight, so
     818                 :          *  this setting will give very similar results to
     819                 :          *  set_sort_by_relevance().  It becomes more useful with particular
     820                 :          *  BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom
     821                 :          *  weighting schemes.
     822                 :          *
     823                 :          * @param sort_key  value number to sort on.
     824                 :          *
     825                 :          * @param ascending  If true, documents values which sort higher by
     826                 :          *               string compare are better.  If false, the sort order
     827                 :          *               is reversed.  (default true)
     828                 :          */
     829                 :         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
     830                 :                                               bool ascending = true);
     831                 : 
     832                 :         /** Set the sorting to be by relevance, then by keys generated from
     833                 :          *  values.
     834                 :          *
     835                 :          *  Note that with the default BM25 weighting scheme parameters,
     836                 :          *  non-identical documents will rarely have the same weight, so
     837                 :          *  this setting will give very similar results to
     838                 :          *  set_sort_by_relevance().  It becomes more useful with particular
     839                 :          *  BM25 parameter settings (e.g. BM25Weight(1,0,1,0,0)) or custom
     840                 :          *  weighting schemes.
     841                 :          *
     842                 :          * @param sorter    The functor to use for generating keys.
     843                 :          *
     844                 :          * @param ascending  If true, keys which sort higher by
     845                 :          *               string compare are better.  If false, the sort order
     846                 :          *               is reversed.  (default true)
     847                 :          */
     848                 :         void set_sort_by_relevance_then_key(Xapian::Sorter * sorter,
     849                 :                                             bool ascending = true);
     850                 : 
     851                 :         /** Get (a portion of) the match set for the current query.
     852                 :          *
     853                 :          *  @param first     the first item in the result set to return.
     854                 :          *                   A value of zero corresponds to the first item
     855                 :          *                   returned being that with the highest score.
     856                 :          *                   A value of 10 corresponds to the first 10 items
     857                 :          *                   being ignored, and the returned items starting
     858                 :          *                   at the eleventh.
     859                 :          *  @param maxitems  the maximum number of items to return.
     860                 :          *  @param checkatleast  the minimum number of items to check.  Because
     861                 :          *                   the matcher optimises, it won't consider every
     862                 :          *                   document which might match, so the total number
     863                 :          *                   of matches is estimated.  Setting checkatleast
     864                 :          *                   forces it to consider at least this many matches
     865                 :          *                   and so allows for reliable paging links.
     866                 :          *  @param omrset    the relevance set to use when performing the query.
     867                 :          *  @param mdecider  a decision functor to use to decide whether a
     868                 :          *                   given document should be put in the MSet.
     869                 :          *  @param matchspy  a decision functor to use to decide whether a
     870                 :          *                   given document should be put in the MSet.  The
     871                 :          *                   matchspy is applied to every document which is
     872                 :          *                   a potential candidate for the MSet, so if there are
     873                 :          *                   checkatleast or more such documents, the matchspy
     874                 :          *                   will see at least checkatleast.  The mdecider is
     875                 :          *                   assumed to be a relatively expensive test so may
     876                 :          *                   be applied in a lazier fashion.
     877                 :          *
     878                 :          *  @return          A Xapian::MSet object containing the results of the
     879                 :          *                   query.
     880                 :          *
     881                 :          *  @exception Xapian::InvalidArgumentError  See class documentation.
     882                 :          */
     883                 :         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
     884                 :                       Xapian::doccount checkatleast = 0,
     885                 :                       const RSet * omrset = 0,
     886                 :                       const MatchDecider * mdecider = 0) const;
     887                 :         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
     888                 :                       Xapian::doccount checkatleast,
     889                 :                       const RSet * omrset,
     890                 :                       const MatchDecider * mdecider,
     891                 :                       const MatchDecider * matchspy) const;
     892                 :         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
     893                 :                       const RSet * omrset,
     894                 :                       const MatchDecider * mdecider = 0) const {
     895                 :             return get_mset(first, maxitems, 0, omrset, mdecider);
     896                 :         }
     897                 : 
     898                 :         static const int INCLUDE_QUERY_TERMS = 1;
     899                 :         static const int USE_EXACT_TERMFREQ = 2;
     900                 : #ifndef _MSC_VER
     901                 :         /// Deprecated in Xapian 1.0.0, use INCLUDE_QUERY_TERMS instead.
     902                 :         XAPIAN_DEPRECATED(static const int include_query_terms) = 1;
     903                 :         /// Deprecated in Xapian 1.0.0, use USE_EXACT_TERMFREQ instead.
     904                 :         XAPIAN_DEPRECATED(static const int use_exact_termfreq) = 2;
     905                 : #else
     906                 :         // Work around MSVC stupidity (you get a warning for deprecating a
     907                 :         // declaration).
     908                 :         static const int include_query_terms = 1;
     909                 :         static const int use_exact_termfreq = 2;
     910                 : #pragma deprecated("Xapian::Enquire::include_query_terms", "Xapian::Enquire::use_exact_termfreq")
     911                 : #endif
     912                 : 
     913                 :         /** Get the expand set for the given rset.
     914                 :          *
     915                 :          *  @param maxitems  the maximum number of items to return.
     916                 :          *  @param omrset    the relevance set to use when performing
     917                 :          *                   the expand operation.
     918                 :          *  @param flags     zero or more of these values |-ed together:
     919                 :          *                    - Xapian::Enquire::INCLUDE_QUERY_TERMS query
     920                 :          *                      terms may be returned from expand
     921                 :          *                    - Xapian::Enquire::USE_EXACT_TERMFREQ for multi
     922                 :          *                      dbs, calculate the exact termfreq; otherwise an
     923                 :          *                      approximation is used which can greatly improve
     924                 :          *                      efficiency, but still returns good results.
     925                 :          *  @param k         the parameter k in the query expansion algorithm
     926                 :          *                   (default is 1.0)
     927                 :          *  @param edecider  a decision functor to use to decide whether a
     928                 :          *                   given term should be put in the ESet
     929                 :          *
     930                 :          *  @return          An ESet object containing the results of the
     931                 :          *                   expand.
     932                 :          *
     933                 :          *  @exception Xapian::InvalidArgumentError  See class documentation.
     934                 :          */
     935                 :         ESet get_eset(Xapian::termcount maxitems,
     936                 :                         const RSet & omrset,
     937                 :                         int flags = 0,
     938                 :                         double k = 1.0,
     939                 :                         const Xapian::ExpandDecider * edecider = 0) const;
     940                 : 
     941                 :         /** Get the expand set for the given rset.
     942                 :          *
     943                 :          *  @param maxitems  the maximum number of items to return.
     944                 :          *  @param omrset    the relevance set to use when performing
     945                 :          *                   the expand operation.
     946                 :          *  @param edecider  a decision functor to use to decide whether a
     947                 :          *                   given term should be put in the ESet
     948                 :          *
     949                 :          *  @return          An ESet object containing the results of the
     950                 :          *                   expand.
     951                 :          *
     952                 :          *  @exception Xapian::InvalidArgumentError  See class documentation.
     953                 :          */
     954                 :         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
     955               4 :                                const Xapian::ExpandDecider * edecider) const {
     956               4 :             return get_eset(maxitems, omrset, 0, 1.0, edecider);
     957                 :         }
     958                 : 
     959                 :         /** Get terms which match a given document, by document id.
     960                 :          *
     961                 :          *  This method returns the terms in the current query which match
     962                 :          *  the given document.
     963                 :          *
     964                 :          *  It is possible for the document to have been removed from the
     965                 :          *  database between the time it is returned in an MSet, and the
     966                 :          *  time that this call is made.  If possible, you should specify
     967                 :          *  an MSetIterator instead of a Xapian::docid, since this will enable
     968                 :          *  database backends with suitable support to prevent this
     969                 :          *  occurring.
     970                 :          *
     971                 :          *  Note that a query does not need to have been run in order to
     972                 :          *  make this call.
     973                 :          *
     974                 :          *  @param did     The document id for which to retrieve the matching
     975                 :          *                 terms.
     976                 :          *
     977                 :          *  @return        An iterator returning the terms which match the
     978                 :          *                 document.  The terms will be returned (as far as this
     979                 :          *                 makes any sense) in the same order as the terms
     980                 :          *                 in the query.  Terms will not occur more than once,
     981                 :          *                 even if they do in the query.
     982                 :          *
     983                 :          *  @exception Xapian::InvalidArgumentError  See class documentation.
     984                 :          *  @exception Xapian::DocNotFoundError      The document specified
     985                 :          *      could not be found in the database.
     986                 :          */
     987                 :         TermIterator get_matching_terms_begin(Xapian::docid did) const;
     988                 : 
     989                 :         /** End iterator corresponding to get_matching_terms_begin() */
     990                 :         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
     991                 :             return TermIterator(NULL);
     992                 :         }
     993                 : 
     994                 :         /** Get terms which match a given document, by match set item.
     995                 :          *
     996                 :          *  This method returns the terms in the current query which match
     997                 :          *  the given document.
     998                 :          *
     999                 :          *  If the underlying database has suitable support, using this call
    1000                 :          *  (rather than passing a Xapian::docid) will enable the system to
    1001                 :          *  ensure that the correct data is returned, and that the document
    1002                 :          *  has not been deleted or changed since the query was performed.
    1003                 :          *
    1004                 :          *  @param it   The iterator for which to retrieve the matching terms.
    1005                 :          *
    1006                 :          *  @return     An iterator returning the terms which match the
    1007                 :          *                 document.  The terms will be returned (as far as this
    1008                 :          *                 makes any sense) in the same order as the terms
    1009                 :          *                 in the query.  Terms will not occur more than once,
    1010                 :          *                 even if they do in the query.
    1011                 :          *
    1012                 :          *  @exception Xapian::InvalidArgumentError  See class documentation.
    1013                 :          *  @exception Xapian::DocNotFoundError      The document specified
    1014                 :          *      could not be found in the database.
    1015                 :          */
    1016                 :         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
    1017                 : 
    1018                 :         /** End iterator corresponding to get_matching_terms_begin() */
    1019                 :         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
    1020                 :             return TermIterator(NULL);
    1021                 :         }
    1022                 : 
    1023                 :         /** Register a MatchDecider.
    1024                 :          *
    1025                 :          * This is used to associate a name with a matchdecider.
    1026                 :          *
    1027                 :          * @deprecated This method is deprecated.  It was added long ago with
    1028                 :          * the intention that it would allow the remote backend to support
    1029                 :          * use of MatchDecider objects, but there's a better approach.
    1030                 :          *
    1031                 :          * @param name          The name to register this matchdecider as.
    1032                 :          * @param mdecider      The matchdecider.  If omitted, then remove
    1033                 :          *                      any matchdecider registered with this name.
    1034                 :          */
    1035                 :         XAPIAN_DEPRECATED(
    1036                 :         void register_match_decider(const std::string &name,
    1037                 :                                     const MatchDecider *mdecider = NULL));
    1038                 : 
    1039                 :         /// Return a string describing this object.
    1040                 :         std::string get_description() const;
    1041                 : };
    1042                 : 
    1043                 : }
    1044                 : 
    1045                 : class RemoteServer;
    1046                 : class ScaleWeight;
    1047                 : 
    1048                 : namespace Xapian {
    1049                 : 
    1050                 : /// Abstract base class for weighting schemes
    1051                 : class XAPIAN_VISIBILITY_DEFAULT Weight {
    1052                 :     friend class Enquire; // So Enquire can clone us
    1053                 :     friend class ::RemoteServer; // So RemoteServer can clone us - FIXME
    1054                 :     friend class ::ScaleWeight;
    1055                 :     public:
    1056                 :         class Internal;
    1057                 :     protected:
    1058                 :         Weight(const Weight &);
    1059                 :     private:
    1060                 :         void operator=(Weight &);
    1061                 : 
    1062                 :         /** Return a new weight object of this type.
    1063                 :          *
    1064                 :          * A subclass called FooWeight taking parameters param1 and param2
    1065                 :          * should implement this as:
    1066                 :          *
    1067                 :          * virtual FooWeight * clone() const {
    1068                 :          *     return new FooWeight(param1, param2);
    1069                 :          * }
    1070                 :          */
    1071                 :         virtual Weight * clone() const = 0;
    1072                 : 
    1073                 :     protected:
    1074                 :         const Internal * internal; // Weight::Internal == Stats
    1075                 :         Xapian::doclength querysize;
    1076                 :         Xapian::termcount wqf;
    1077                 :         std::string tname;
    1078                 : 
    1079                 :     public:
    1080                 :         // FIXME:1.1: initialise internal to NULL here
    1081                 :         Weight() { }
    1082                 :         virtual ~Weight();
    1083                 : 
    1084                 :         /** Create a new weight object of the same type as this and initialise
    1085                 :          *  it with the specified statistics.
    1086                 :          *
    1087                 :          *  You shouldn't call this method yourself - it's called by
    1088                 :          *  Enquire.
    1089                 :          *
    1090                 :          *  @param internal_  Object to ask for collection statistics.
    1091                 :          *  @param querysize_ Query size.
    1092                 :          *  @param wqf_       Within query frequency of term this object is
    1093                 :          *                    associated with.
    1094                 :          *  @param tname_     Term which this object is associated with.
    1095                 :          */
    1096                 :         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
    1097                 :                         Xapian::termcount wqf_, const std::string & tname_) const;
    1098                 : 
    1099                 :         /** Name of the weighting scheme.
    1100                 :          *
    1101                 :          *  If the subclass is called FooWeight, this should return "Foo".
    1102                 :          */
    1103                 :         virtual std::string name() const = 0;
    1104                 : 
    1105                 :         /// Serialise object parameters into a string.
    1106                 :         virtual std::string serialise() const = 0;
    1107                 : 
    1108                 :         /// Create object given string serialisation returned by serialise().
    1109                 :         virtual Weight * unserialise(const std::string &s) const = 0;
    1110                 : 
    1111                 :         /** Get a weight which is part of the sum over terms being performed.
    1112                 :          *  This returns a weight for a given term and document.  These
    1113                 :          *  weights are summed to give a total weight for the document.
    1114                 :          *
    1115                 :          *  @param wdf the within document frequency of the term.
    1116                 :          *  @param len the (unnormalised) document length.
    1117                 :          */
    1118                 :         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
    1119                 :                                       Xapian::doclength len) const = 0;
    1120                 : 
    1121                 :         /** Gets the maximum value that get_sumpart() may return.  This
    1122                 :          *  is used in optimising searches, by having the postlist tree
    1123                 :          *  decay appropriately when parts of it can have limited, or no,
    1124                 :          *  further effect.
    1125                 :          */
    1126                 :         virtual Xapian::weight get_maxpart() const = 0;
    1127                 : 
    1128                 :         /** Get an extra weight for a document to add to the sum calculated
    1129                 :          *  over the query terms.
    1130                 :          *  This returns a weight for a given document, and is used by some
    1131                 :          *  weighting schemes to account for influence such as document
    1132                 :          *  length.
    1133                 :          *
    1134                 :          *  @param len the (unnormalised) document length.
    1135                 :          */
    1136                 :         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
    1137                 : 
    1138                 :         /** Gets the maximum value that get_sumextra() may return.  This
    1139                 :          *  is used in optimising searches.
    1140                 :          */
    1141                 :         virtual Xapian::weight get_maxextra() const = 0;
    1142                 : 
    1143                 :         /// return false if the weight object doesn't need doclength
    1144                 :         virtual bool get_sumpart_needs_doclength() const; /* { return true; } */
    1145                 : };
    1146                 : 
    1147                 : /// Boolean weighting scheme (everything gets 0)
    1148                 : class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
    1149                 :     public:
    1150                 :         BoolWeight * clone() const;
    1151                 :         BoolWeight() { }
    1152                 :         ~BoolWeight();
    1153                 :         std::string name() const;
    1154                 :         std::string serialise() const;
    1155                 :         BoolWeight * unserialise(const std::string & s) const;
    1156                 :         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
    1157                 :         Xapian::weight get_maxpart() const;
    1158                 : 
    1159                 :         Xapian::weight get_sumextra(Xapian::doclength len) const;
    1160                 :         Xapian::weight get_maxextra() const;
    1161                 : 
    1162                 :         bool get_sumpart_needs_doclength() const;
    1163                 : };
    1164                 : 
    1165                 : /** BM25 weighting scheme
    1166                 :  *
    1167                 :  * BM25 weighting options : The BM25 formula is \f[
    1168                 :  *      \frac{k_{2}.n_{q}}{1+L_{d}}+\sum_{t}\frac{(k_{3}+1)q_{t}}{k_{3}+q_{t}}.\frac{(k_{1}+1)f_{t,d}}{k_{1}((1-b)+bL_{d})+f_{t,d}}.w_{t}
    1169                 :  * \f] where
    1170                 :  *   - \f$w_{t}\f$ is the termweight of term t
    1171                 :  *   - \f$f_{t,d}\f$ is the within document frequency of term t in document d
    1172                 :  *   - \f$q_{t}\f$ is the within query frequency of term t
    1173                 :  *   - \f$L_{d}\f$ is the normalised length of document d
    1174                 :  *   - \f$n_{q}\f$ is the size of the query
    1175                 :  *   - \f$k_{1}\f$, \f$k_{2}\f$, \f$k_{3}\f$ and \f$b\f$ are user specified parameters
    1176                 :  */
    1177                 : class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
    1178                 :     private:
    1179                 :         mutable Xapian::weight termweight;
    1180                 :         mutable Xapian::doclength lenpart;
    1181                 : 
    1182                 :         double k1, k2, k3, b;
    1183                 :         Xapian::doclength min_normlen;
    1184                 : 
    1185                 :         mutable bool weight_calculated;
    1186                 : 
    1187                 :         void calc_termweight() const;
    1188                 : 
    1189                 :     public:
    1190                 :         /** Construct a BM25 weight.
    1191                 :          *
    1192                 :          * @param k1 governs the importance of within document frequency.
    1193                 :          *                Must be >= 0.  0 means ignore wdf.  Default is 1.
    1194                 :          * @param k2 compensation factor for the high wdf values in
    1195                 :          *                large documents.  Must be >= 0.  0 means no
    1196                 :          *                compensation.  Default is 0.
    1197                 :          * @param k3 governs the importance of within query frequency.
    1198                 :          *                Must be >= 0.  0 means ignore wqf.  Default is 1.
    1199                 :          * @param b Relative importance of within document frequency and
    1200                 :          *                document length.  Must be >= 0 and <= 1.  Default
    1201                 :          *                is 0.5.
    1202                 :          * @param min_normlen specifies a cutoff on the minimum value that
    1203                 :          *                can be used for a normalised document length -
    1204                 :          *                smaller values will be forced up to this cutoff.
    1205                 :          *                This prevents very small documents getting a huge
    1206                 :          *                bonus weight.  Default is 0.5.
    1207                 :          */
    1208                 :         BM25Weight(double k1_, double k2_, double k3_, double b_,
    1209                 :                    double min_normlen_)
    1210                 :                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
    1211                 :                   weight_calculated(false)
    1212                 :         {
    1213                 :             if (k1 < 0) k1 = 0;
    1214                 :             if (k2 < 0) k2 = 0;
    1215                 :             if (k3 < 0) k3 = 0;
    1216                 :             if (b < 0) b = 0; else if (b > 1) b = 1;
    1217                 :         }
    1218                 :         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
    1219                 :                        weight_calculated(false) { }
    1220                 : 
    1221                 :         BM25Weight * clone() const;
    1222                 :         ~BM25Weight() { }
    1223                 :         std::string name() const;
    1224                 :         std::string serialise() const;
    1225                 :         BM25Weight * unserialise(const std::string & s) const;
    1226                 :         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
    1227                 :         Xapian::weight get_maxpart() const;
    1228                 : 
    1229                 :         Xapian::weight get_sumextra(Xapian::doclength len) const;
    1230                 :         Xapian::weight get_maxextra() const;
    1231                 : 
    1232                 :         bool get_sumpart_needs_doclength() const;
    1233                 : };
    1234                 : 
    1235                 : /** Traditional probabilistic weighting scheme.
    1236                 :  *
    1237                 :  * This class implements the Traditional Probabilistic Weighting scheme, as
    1238                 :  * described by the early papers on Probabilistic Retrieval.  BM25 generally
    1239                 :  * gives better results.
    1240                 :  *
    1241                 :  * The Traditional weighting scheme formula is \f[
    1242                 :  *      \sum_{t}\frac{f_{t,d}}{k.L_{d}+f_{t,d}}.w_{t}
    1243                 :  * \f] where
    1244                 :  *   - \f$w_{t}\f$ is the termweight of term t
    1245                 :  *   - \f$f_{t,d}\f$ is the within document frequency of term t in document d
    1246                 :  *   - \f$L_{d}\f$ is the normalised length of document d
    1247                 :  *   - \f$k\f$ is a user specifiable parameter
    1248                 :  *
    1249                 :  * TradWeight(k) is equivalent to BM25Weight(k, 0, 0, 1, 0), except that
    1250                 :  * the latter returns weights (k+1) times larger.
    1251                 :  */
    1252                 : class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
    1253                 :     private:
    1254                 :         mutable Xapian::weight termweight;
    1255                 :         mutable Xapian::doclength lenpart;
    1256                 : 
    1257                 :         double param_k;
    1258                 : 
    1259                 :         mutable bool weight_calculated;
    1260                 : 
    1261                 :         void calc_termweight() const;
    1262                 : 
    1263                 :     public:
    1264                 :         /** Construct a TradWeight
    1265                 :          *
    1266                 :          * @param k  parameter governing the importance of within
    1267                 :          *           document frequency and document length - any non-negative
    1268                 :          *           number (0 meaning to ignore wdf and doc length when
    1269                 :          *           calculating weights).  Default is 1.
    1270                 :          */
    1271                 :         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
    1272                 :             if (param_k < 0) param_k = 0;
    1273                 :         }
    1274                 : 
    1275                 :         TradWeight() : param_k(1.0), weight_calculated(false) { }
    1276                 : 
    1277                 :         TradWeight * clone() const;
    1278                 :         ~TradWeight() { }
    1279                 :         std::string name() const;
    1280                 :         std::string serialise() const;
    1281                 :         TradWeight * unserialise(const std::string & s) const;
    1282                 : 
    1283                 :         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
    1284                 :         Xapian::weight get_maxpart() const;
    1285                 : 
    1286                 :         Xapian::weight get_sumextra(Xapian::doclength len) const;
    1287                 :         Xapian::weight get_maxextra() const;
    1288                 : 
    1289                 :         bool get_sumpart_needs_doclength() const;
    1290                 : };
    1291                 : 
    1292                 : }
    1293                 : 
    1294                 : #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Generated by: LTP GCOV extension version 1.6