1 : /*
2 : * Merge different vocabularies together and create the tag and facet indexes
3 : *
4 : * Copyright (C) 2003-2007 Enrico Zini <enrico@debian.org>
5 : *
6 : * This program is free software; you can redistribute it and/or modify
7 : * it under the terms of the GNU General Public License as published by
8 : * the Free Software Foundation; either version 2 of the License, or
9 : * (at your option) any later version.
10 : *
11 : * This program is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License
17 : * along with this program; if not, write to the Free Software
18 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 : */
20 :
21 : #include <tagcoll/diskindex/mmap.h>
22 : #include <tagcoll/input/base.h>
23 : #include <string>
24 : #include <map>
25 : #include <set>
26 :
27 : #ifndef EPT_DEBTAGS_VOCABULARYMERGER_H
28 : #define EPT_DEBTAGS_VOCABULARYMERGER_H
29 :
30 : namespace ept {
31 : namespace debtags {
32 :
33 : class VocabularyMerger
34 7 : {
35 : protected:
36 : class FacetIndexer : public tagcoll::diskindex::MMapIndexer
37 : {
38 : protected:
39 : VocabularyMerger& vm;
40 : public:
41 7 : FacetIndexer(VocabularyMerger& vm) : vm(vm) {}
42 7 : virtual ~FacetIndexer() {}
43 : virtual int encodedSize() const;
44 : virtual void encode(char* buf) const;
45 : };
46 : class TagIndexer : public tagcoll::diskindex::MMapIndexer
47 : {
48 : protected:
49 : VocabularyMerger& vm;
50 : public:
51 7 : TagIndexer(VocabularyMerger& vm) : vm(vm) {}
52 7 : virtual ~TagIndexer() {}
53 : virtual int encodedSize() const;
54 : virtual void encode(char* buf) const;
55 : };
56 : class TagData : public std::map<std::string, std::string>
57 8715 : {
58 : public:
59 : std::string name;
60 : // Offset in the last written file (used for indexing)
61 : long ofs;
62 : int len;
63 : int id;
64 :
65 1245 : TagData() : ofs(0), len(0) {}
66 : };
67 : class FacetData : public std::map<std::string, std::string>
68 434 : {
69 : public:
70 : std::string name;
71 : std::map<std::string, TagData> tags;
72 : // Offset in the last written file (used for indexing)
73 : long ofs;
74 : int len;
75 : int id;
76 :
77 62 : FacetData() : ofs(0), len(0) {}
78 :
79 : TagData& obtainTag(const std::string& fullname);
80 : };
81 : std::map<std::string, FacetData> facets;
82 : int tagCount;
83 : FacetIndexer findexer;
84 : TagIndexer tindexer;
85 :
86 : FacetData& obtainFacet(const std::string& name);
87 : TagData& obtainTag(const std::string& fullname);
88 :
89 : public:
90 7 : VocabularyMerger() : tagCount(0), findexer(*this), tindexer(*this) {}
91 :
92 : /**
93 : * Check if there is any data in the merged vocabulary
94 : */
95 3 : bool empty() const { return facets.empty(); }
96 :
97 : /**
98 : * Parse and import the vocabulary from `input', merging the data with the
99 : * previously imported ones
100 : */
101 : void read(tagcoll::input::Input& input);
102 :
103 : /**
104 : * Write the vocabulary data to the given file
105 : */
106 : void write(const std::string& fname);
107 :
108 : /**
109 : * Write the vocabulary data to the given output stream
110 : */
111 : void write(FILE* out);
112 :
113 : /**
114 : * Get the facet indexer.
115 : *
116 : * Note: the indexers will only be functional after one of the write
117 : * methods have been invoked
118 : */
119 7 : const tagcoll::diskindex::MMapIndexer& facetIndexer() const { return findexer; }
120 :
121 : /**
122 : * Get the tag indexer.
123 : *
124 : * Note: the indexers will only be functional after one of the write
125 : * methods have been invoked
126 : */
127 7 : const tagcoll::diskindex::MMapIndexer& tagIndexer() const { return tindexer; }
128 :
129 : /**
130 : * Check if the vocabulary contains the facet `name'
131 : */
132 : bool hasFacet(const std::string& name) const
133 : {
134 : return facets.find(name) != facets.end();
135 : }
136 :
137 : /**
138 : * Check if the vocabulary contains the tag `fullname'
139 : */
140 : bool hasTag(const std::string& fullname) const;
141 :
142 : /**
143 : * Return the ID for the given tag (or -1 if not found)
144 : */
145 : int tagID(const std::string& fullname) const;
146 :
147 : /**
148 : * Return a set with all tag names
149 : */
150 : std::set<std::string> tagNames() const;
151 : };
152 :
153 : }
154 : }
155 :
156 : // vim:set ts=4 sw=4:
157 : #endif
|