Xpace
index.h
Go to the documentation of this file.
1 
2 /*************************************************************
3  **
4  ** @file index/index.h
5  **
6  ** Copyright (C) 2012 Xpace, LLC. All rights reserved
7  **
8  ** www.xpace.net
9  **
10  **************************************************************/
11 
12 #ifndef XPACE_INDEX_H
13 #define XPACE_INDEX_H
14 
15 #include "base/types.h"
16 #include "base/sharedimpl.h"
17 #include "base/config.h"
18 #include "util/file.h"
19 #include "index/reflist/reflist.h"
20 
21 namespace Xpace
22 {
23  /// An index
25  {
26  public :
27  enum { MAX_TERM_SIZE = 8192 };
28 
29  static const char* FIELD_TAG; ///< Configuration tag for an indexed field
30  static const char* PARSED_TAG; ///< Configuration tag for a parsed index
31 
32  /// create an empty index
33  /// @param config use this configuration
34  explicit Index
35  (const Configuration& config);
36 
37  /// read an index
38  /// @param config use this configuration
39  /// @param indexFile read from this file
40  /// @param oRoot file position of index root
41  /// @param refCount number of refs in the index's universe
42  Index
43  (const Configuration& config,
44  File* indexFile,
45  File::Position oRoot,
46  uint64 refCount = 0);
47 
48  /// @return true iff the index is empty
49  bool operator!
50  ()
51  const;
52 
53  /// @return true iff the index has term counts
54  bool hasCounts
55  ()
56  const;
57 
58  /// @return Index's name
59  String getName
60  ()
61  const;
62 
63  /// @return Index's configuration
64  String getConfig
65  ()
66  const;
67 
68  /// sort this index; could take a while
69  /// status callback
70  struct sortStatus
71  {
72  virtual ~sortStatus
73  ()
74  {
75  };
76 
78  () :
79  interval(0)
80  {
81  };
82 
83  /// @return true to continue, false to stop
84  virtual bool operator()
85  ()
86  {
87  return true;
88  };
89  uint64 interval; ///< call operator() this often
90  };
91  /// @param st status callback
92  bool makeSorted
93  (sortStatus* st = 0);
94 
95  /// write this index; could take a while
96  /// status callback
97  struct WriteStatus
98  {
99  virtual ~WriteStatus
100  ()
101  {
102  };
103 
104  /// @param term the term being written
105  /// @param offset file offset to wich we're writing
106  /// @return interval (in bytes) to continue, 0 to stop
107  virtual uint operator()
108  (uint64 termNum,
109  String term,
110  File::Position offset)
111  = 0;
112  };
113  /// @param file write to this file (at current position)
114  /// @param st status callback
115  /// @return file position of index root
116  File::Position write
117  (File* file,
118  WriteStatus* st = 0);
119 
120  /// merge a list of indexes
121  /// @param file merge to this file
122  /// @param list merge these indexes
123  /// @return file position of index root
124  static
125  File::Position merge
126  (File* file,
127  std::vector<Index>& list);
128 
129  /// index term iterator
131  {
132  public :
133  /// @retval the count of docs containing the term (if index has counts)
134  /// @return a string representation (from a Collator) of this index term
135  String getTerm
136  (uint64* count = 0)
137  const;
138 
139  /// @return the list of references for this index term
140  RefListCursor getRefList
141  ()
142  const;
143 
144  /// @return the list of all references for all index terms in the interval [*this, end)
145  /// @param (one past) the end of the interval
146  RefListCursor getRefList
147  (const Iter& end)
148  const;
149 
150  /// @return true if the iterator is past the end of the index's term list
151  bool operator!
152  ()
153  const;
154 
155  /// @param delta move the iterator this far in the index list
156  /// @param relative if true move from current position, else move from start
157  /// @return true if move successful, false (and don't move) if not
158  bool move
159  (int64 delta,
160  bool relative = true);
161  /// the distance between two iterators
162  /// @param rhs the other iterator
163  /// @return the distance
164  int64 diff
165  (const Iter& rhs)
166  const;
167 
168  /// @return the position of the iterator
169  uint64 getPos
170  ()
171  const;
172 
173  private :
175  friend class Index;
176  friend class RLC_Index;
177  };
178 
179  /// @return the number of terms in the index
180  uint64 getTermCount
181  ()
182  const;
183 
184  /// @return Iter corresponding to the first term in the Index
185  Iter begin
186  ()
187  const;
188 
189  /// find a key in the index
190  /// @param key find this String
191  /// @param found fillin true if key is found
192  /// @return Iter corresponding to the key, next Iter if not found
193  Iter begin
194  (const String& key,
195  bool* found = 0)
196  const;
197 
198  /// @param key find this int64
199  /// @retval found fillin true if key is found
200  /// @return Iter corresponding to the key, next Iter if not found
201  Iter begin
202  (int64 key,
203  bool* found = 0)
204  const;
205 
206  /// @param key find this Key
207  /// @retval found fillin true if key is found
208  /// @return Iter corresponding to the key, next Iter if not found
209  Iter begin
210  (const BytesRef& key,
211  bool* found = 0)
212  const;
213 
214  /// search for a key, adding if it's not there
215 
216  /// @param key find/add this String
217  /// @param added fillin true if key was added
218  /// @return Iter corresponding to the key
219  Iter beginAdd
220  (const String& key,
221  bool* added);
222 
223  /// @param key find/add this int64
224  /// @param added fillin true if key was added
225  /// @return Iter corresponding to the key
226  Iter beginAdd
227  (int64 key,
228  bool* added);
229 
230  /// @param key find/add this Key
231  /// @param found fillin true if key was added
232  /// @return Iter corresponding to the key
233  Iter beginAdd
234  (const BytesRef& key,
235  bool* added);
236 
237  /// @return the iter one past the end
238  Iter end
239  ()
240  const;
241 
242  /// @return true iff term can match a regular expression (next() must return something meaningful)
243  bool regexOK
244  ()
245  const;
246 
247  /// @param term an index term
248  /// @return the next possible term of the same length after this one
249  String next
250  (const String& key)
251  const;
252 
253  #ifdef XPACE_COMPARATOR_H
254  std::unique_ptr<Comparator> getComparator
255  ()
256  const;
257  #endif
258 
259  /// traverse each term, or each reference, in an interval of terms
260  class Sink
261  {
262  public:
263  virtual ~Sink
264  ()
265  {
266  }
267 
268  /// called for each term
269  /// @param key a string representation (from a Collator) of this index term
270  /// @param count the count (if stored in the Index) of references to the key
271  /// @return true to continue, false to stop
272  virtual bool term
273  (const String& key,
274  uint64 count)
275  = 0;
276  /// called for each ref
277  /// @param ref the ref
278  /// @return true to continue, false to stop
279  virtual bool ref
280  (const Ref<uint64>& /*ref*/)
281  {
282  return true;
283  }
284  };
285 
286  /// traverse each term in the range [begin, end)
287  /// @param begin start here
288  /// @param end end here
289  /// @param act call this for each term
290  /// @param refList if != 0, call act only for terms in this refList
291  /// @return true if completed, false if stopped by callback
292  bool forEachTerm
293  (Iter& begin,
294  Iter& end,
295  Sink* sink,
296  const RefListCursor* refList = 0)
297  const;
298  /// @param start start at the start'th term
299  /// @param count for this many terms
300  bool forEachTerm
301  (uint64 start,
302  uint64 count,
303  Sink* sink,
304  const RefListCursor* refList = 0)
305  const;
306  bool forEachTerm
307  (Sink* sink,
308  const RefListCursor* refList = 0)
309  const
310  {
311  return forEachTerm(0, ~uint64(0), sink, refList);
312  }
313 
314  /// traverse each ref in each term in the range [begin, end)
315  /// @param begin start here
316  /// @param end end here
317  /// @param act call this for each term, and each ref
318  /// @return true if completed, false if stopped by callback
319  bool forEachRef
320  (Iter& begin,
321  Iter& end,
322  Sink* act)
323  const;
324  /// @param start start at the start'th term
325  /// @param count for this many terms
326  /// @return true if completed, false if stopped by callback
327  bool forEachRef
328  (uint64 start,
329  uint64 count,
330  Sink* act)
331  const;
332  bool forEachRef
333  (Sink* act)
334  const
335  {
336  return forEachRef(0, ~uint64(0), act);
337  }
338 
339  #ifdef XPACE_RESULT_H
340  /// Get a range of terms
341  /// @param begin starting at this term
342  /// @param count (at most) this many terms
343  /// @param includeCounts if there are counts in the index, include them in the result
344  /// @return the result, filled with the terms
345  TableResult getTerms
346  (uint64 begin,
347  uint64 count,
348  bool includeCounts)
349  const;
350  #endif
351 
352  #ifdef XPACE_REFLIST_H
353  /// @param query a query string
354  /// @param recCount for NOT searches
355  /// @return a RefListCursor into the result
356  RefListCursor eval
357  (String query,
358  uint64 recCount = 0)
359  const;
360 
361  #ifdef QDOM_H
362  /// @param query a search tree
363  /// @param recCount for NOT searches
364  /// @return a RefListCursor into the result
365  RefListCursor eval
366  (QDomNode query,
367  uint64 recCount = 0)
368  const;
369  #endif
370  #endif
371 
372  private :
373  File::Position write_temp
374  (File*,
375  WriteStatus*);
376 
377  DECLARE_IMPL(Index)
378  };
379 
380  /// Parameters for sorting
382  {
383  const Index* index; ///< Sort using this index
384 
385  bool ascend; ///< Sort ascending if true, descending if false
386  bool keepDuplicates; ///< If true, repeat recurring terms (single sort only)
387 
388  /// What to do with unindexed terms
390  {
391  unindexedDrop = 0, ///< drop them
392  unindexedfirst = 1, ///< return them first
393  unindexedLast = 2 ///< return them last
394  };
396 
398  (const Index* index,
399  bool ascend = true,
400  bool keepDuplicates = true,
401  unindexedAct unindexed = unindexedDrop);
402  };
403 };
404 
405 #endif
static const char * PARSED_TAG
Configuration tag for a parsed index.
Definition: index.h:30
A low-level data holder.
Definition: types_c.h:82
traverse each term, or each reference, in an interval of terms
Definition: index.h:260
unsigned int uint
Definition: types.h:75
sort this index; could take a while status callback
Definition: index.h:70
Each high-level Xpace object has a Configuration.
Definition: config.h:29
const Xpace_Char16 Xpace_Data_Type uint bool * added
Definition: table_c.h:141
A string, Unicode UTF-16 and reference-counted.
Definition: types.h:269
Copyright (C) 2012 Xpace, LLC.
uint64 interval
call operator() this often
Definition: index.h:88
A low-level const data holder.
Definition: types.h:165
Copyright (C) 2016 Xpace, LLC.
#define DECLARE_IMPL(className)
Definition: sharedimpl.h:305
unsigned long long uint64
Definition: types.h:87
index term iterator
Definition: index.h:130
uint64 Position
Definition: file.h:40
unindexedAct
What to do with unindexed terms.
Definition: index.h:389
static const char * FIELD_TAG
Configuration tag for an indexed field.
Definition: index.h:29
Copyright (C) 2012 Xpace, LLC.
const Index * index
Sort using this index.
Definition: index.h:383
Parameters for sorting.
Definition: index.h:381
write this index; could take a while status callback
Definition: index.h:97
unindexedAct unindexed
Definition: index.h:395
long long int64
Definition: types.h:86
uint uint64 Xpace_Table_Sink * sink
Definition: table_c.h:340
A cursor into a list of references - e.g., a search result A single reference is an array of uint64s...
Definition: reflist.h:26
Xpace project main namespace
Definition: datetime.h:18
bool keepDuplicates
If true, repeat recurring terms (single sort only)
Definition: index.h:386
An index.
Definition: index.h:24
bool ascend
Sort ascending if true, descending if false.
Definition: index.h:385

current as of Wed Jun 10 2026 12:00:05