Xpace
eval.h
Go to the documentation of this file.
1 
2 /**************************************************************
3  **
4  ** @file index/eval.h
5  **
6  ** Copyright (C) 2012 Xpace, LLC. All rights reserved
7  **
8  ** www.xpace.net
9  **
10  **************************************************************/
11 
12 #ifndef XPACE_EVAL_H
13 #define XPACE_EVAL_H
14 
15 #include <functional>
16 
17 #include <QtCore/qregexp.h>
18 
19 #include "base/types.h"
20 #include "base/exception.h"
21 #include "index/searchnames.h"
22 
23 namespace Xpace
24 {
25  // thrown when an index can't be opened
27  {
28  public:
30  (String indexName);
31  };
32 
33  // thrown when a regex search is requested of an index that does not support them
35  {
36  public:
38  (String indexName);
39  };
40 
41  // thrown when an invalid regex is passed
43  {
44  public:
46  (String regex);
47  };
48 
49  // ================================ FIND ====================
50 
51  #ifdef QDOM_H
52  template <typename INDEX>
53  typename INDEX::Iter find_term
54  (const INDEX& index,
55  QDomNode termNode,
56  bool* found = 0)
57  {
58  QDomNode tn(termNode.hasChildNodes() ? termNode.firstChild() : termNode);
59  String val(tn.nodeValue());
60 
61  return index.begin(val, found);
62  }
63  #endif
64 
65  template <typename INDEX, typename STR>
66  typename INDEX::Iter find_term
67  (const INDEX& index,
68  const STR& val,
69  bool* found = 0)
70  {
71  return index.begin(val, found);
72  }
73 
74  // ================================ RANGE ===================
75 
76  template <typename REFLISTCURSOR, typename INDEX_ITER>
77  REFLISTCURSOR eval_range
78  (const INDEX_ITER& begin,
79  const INDEX_ITER& end)
80  {
81  return begin.getRefList(end);
82  }
83 
84  #ifdef QDOM_H
85  // ================================ REGEX ===================
86 
87  template <typename REFLISTCURSOR, typename INDEX>
88  REFLISTCURSOR eval_regex(const INDEX& index, QDomNode node)
89  {
90  QDomNode tn(node.hasChildNodes() ? node.firstChild() : node);
91  QString val(tn.nodeValue());
92 
93  static QString regex_chars("*?+(){}|.^$\\{}");
94 
95  uint literal_chars(0);
96  bool use_regex(false);
97 
98  auto ch(val.begin());
99  if (ch == val.end())
100  return REFLISTCURSOR();
101 
102  if (*ch == '^')
103  ++ch;
104 
105  for ( ; ch < val.end(); ++ch)
106  {
107  int i(regex_chars.indexOf(*ch));
108  if (i == -1)
109  ++literal_chars;
110  else
111  {
112  use_regex = true;
113  break;
114  }
115  }
116 
117  typename INDEX::Iter it, end;
118  if (use_regex)
119  {
120  it = index.begin();
121  end = index.end();
122 
123  QRegExp regex(val);
124  if (!regex.isValid())
125  throw evalInvalidRegex(val);
126 
127  REFLISTCURSOR result;
128  for ( ; it.diff(end); it.move(1))
129  if (regex.exactMatch(it.getTerm()))
130  {
131  REFLISTCURSOR refs(it.getRefList());
132  result.opOr(&refs);
133  }
134  return result;
135  }
136 
137  // no regex - just a truncation
138  QString start_term(val);
139  start_term.truncate(literal_chars);
140  REVEAL_STRING(sstr, start_term);
141  it = index.begin(start_term);
142  QString end_term(index.next(start_term));
143  REVEAL_STRING(estr, end_term);
144  end = index.begin(end_term);
145 
146  return it.getRefList(end);
147  }
148 
149  // ================================ EVAL ====================
150 
151  QDomNode parseQuery
152  (String query);
153 
154  template <typename REFLISTCURSOR, typename INDEXLIST, typename INDEX>
155  REFLISTCURSOR eval
156  (const INDEXLIST& indexList,
157  QDomNode node,
158  uint64 recCount,
159  uint depth,
160  std::function<const INDEX(const INDEXLIST& indexList,
161  const QDomNode& node,
162  String* indexName)> getIndex,
163  bool isAnd = false)
164  {
165  if (node.isNull())
166  return REFLISTCURSOR();
167 
168  QString name(node.nodeName());
169  REVEAL_STRING(n, name);
170 
171  if ((name == SearchNames::DOCUMENT_TAG) ||
175  return eval<REFLISTCURSOR, INDEXLIST, INDEX>(indexList, node.firstChild(), recCount, depth + 1, getIndex);
176 
177  std::vector<REFLISTCURSOR> to_and;
178 
179  do // for each sibling
180  {
181  REFLISTCURSOR result; // this will be the result for this sibling
182  bool skip = false;
183 
184  if (name == SearchNames::OP_NOT)
185  {
186  result = eval<REFLISTCURSOR, INDEXLIST, INDEX>(indexList, node.firstChild(), recCount, depth + 1, getIndex);
187  result.opNot(recCount);
188  } // not
189 
190  else if (name == SearchNames::OP_OR)
191  {
192  QDomNode child(node.firstChild());
193  result = eval<REFLISTCURSOR, INDEXLIST, INDEX>(indexList, child, recCount, depth + 1, getIndex);
194  while (!(child = child.nextSibling()).isNull())
195  {
196  REFLISTCURSOR sibling(eval<REFLISTCURSOR, INDEXLIST, INDEX>(indexList, child, recCount, depth + 1, getIndex));
197  result.opOr(&sibling);
198  }
199  } // or
200 
201  else
202  {
203  String index_name;
204  const INDEX& index(getIndex(indexList, node, &index_name));
205 
206  #ifdef QREGEXP_H
208  {
209  if (!index.regexOK())
210  throw evalCantRegex(index_name);
211  result = eval_regex<REFLISTCURSOR, INDEX>(index, node);
212  } // regex
213  else
214  #endif
215 
216  if ((name == SearchNames::OP_EQ) ||
217  (name == SearchNames::OP_NE) ||
218  (name == SearchNames::OP_TERM) ||
220  {
221  if (!index)
222  throw evalCantOpenIndex(index_name);
223 
224  bool found;
225  typename INDEX::Iter it(find_term(index, node, &found));
226  result = (found) ? it.getRefList() : REFLISTCURSOR();
227 
228  if (name == SearchNames::OP_NE)
229  result.opNot(recCount);
230 
231  } // eq, term, phrase
232 
233  else if (name == SearchNames::OP_LT)
234  {
235  if (!index)
236  throw evalCantOpenIndex(index_name);
237 
238  bool found;
239  typename INDEX::Iter end(find_term(index, node.firstChild(), &found));
240  result = eval_range<REFLISTCURSOR>(index.begin(), end);
241  } // lt
242 
243  else if (name == SearchNames::OP_GT)
244  {
245  if (!index)
246  throw evalCantOpenIndex(index_name);
247 
248  bool found;
249  typename INDEX::Iter begin(find_term(index, node.firstChild(), &found));
250  if (!found || begin.move(1))
251  result = eval_range<REFLISTCURSOR>(begin, index.end());
252  } // gt
253 
254  else if (name == SearchNames::OP_LE)
255  {
256  if (!index)
257  throw evalCantOpenIndex(index_name);
258 
259  bool found;
260  typename INDEX::Iter end(find_term(index, node.firstChild(), &found));
261  if (found)
262  end.move(1);
263  result = eval_range<REFLISTCURSOR>(index.begin(), end);
264  } // le
265 
266  else if (name == SearchNames::OP_GE)
267  {
268  if (!index)
269  throw evalCantOpenIndex(index_name);
270 
271  // see if this is really a range search (TODO move this into a general search planner
272  QDomNode next(node.nextSibling());
273  if (!next.isNull() && (next.nodeName() == SearchNames::OP_LE) && next.nextSibling().isNull() &&
274  (node.toElement().attribute(SearchNames::INDEX_TAG) == next.toElement().attribute(SearchNames::INDEX_TAG)))
275  {
276  // it is a range search
277  bool found;
278  typename INDEX::Iter begin(find_term(index, node.firstChild(), &found));
279  typename INDEX::Iter end(find_term(index, next.firstChild(), &found));
280  if (found)
281  end.move(1);
282  result = eval_range<REFLISTCURSOR>(begin, end);
283  node = next;
284  }
285 
286  else
287  {
288  // not a range search
289  bool found;
290  typename INDEX::Iter begin(find_term(index, node.firstChild(), &found));
291  result = eval_range<REFLISTCURSOR>(begin, index.end());
292  }
293  } // ge
294 
295  else if (name == SearchNames::OP_ALL)
296  {
297  if (!index)
298  throw evalCantOpenIndex(index_name);
299 
300  // TODO: optimize all search
301  return eval_range<REFLISTCURSOR>(index.begin(), index.end());
302  } // all
303 
304  else if (name == SearchNames::OP_AND)
305  result = eval<REFLISTCURSOR, INDEXLIST, INDEX>(indexList, node.firstChild(), recCount, depth + 1, getIndex, true);
306 
307  else
308  // skip unrecognized branch
309  skip = true;
310  }
311 
312  if (!skip)
313  {
314  if (!!result)
315  to_and.push_back(result);
316  else
317  // and-ing with 0
318  return REFLISTCURSOR();
319  }
320 
321  if ((node = node.nextSibling()).isNull())
322  break;
323  name = node.nodeName();
324  }
325  while (isAnd); // default search is And; if not, just return
326 
327  return REFLISTCURSOR::opAnd(&to_and);
328  };
329  #endif
330 }
331 
332 #endif
static const char * OP_PHRASE
Definition: searchnames.h:35
A low-level data holder.
Definition: types_c.h:82
static const char * OP_AND
Definition: searchnames.h:31
static const char * OP_TERM
Definition: searchnames.h:34
unsigned int uint
Definition: types.h:75
static const char * OP_REGEX
Definition: searchnames.h:42
static const char * INDEX_TAG
Definition: searchnames.h:26
static const char * OP_LT
Definition: searchnames.h:37
INDEX::Iter find_term(const INDEX &index, const STR &val, bool *found=0)
Definition: eval.h:67
static const char * QUERY_TAG
Definition: searchnames.h:21
static const char * OP_NOT
Definition: searchnames.h:29
static const char * OP_OR
Definition: searchnames.h:30
A string, Unicode UTF-16 and reference-counted.
Definition: types.h:269
static const char * OP_NE
Definition: searchnames.h:36
static const char * SEARCH_TAG
Definition: searchnames.h:22
static const char * OP_ALL
Definition: searchnames.h:44
unsigned long long uint64
Definition: types.h:87
static const char * OP_GE
Definition: searchnames.h:40
REFLISTCURSOR eval_range(const INDEX_ITER &begin, const INDEX_ITER &end)
Definition: eval.h:78
static const char * OP_EQ
Definition: searchnames.h:33
const Xpace_Char16 * name
Sink callbacks for table data.
Definition: table_c.h:141
static const char * OP_GT
Definition: searchnames.h:38
Xpace_StoreAccess Xpace_Table * result
Definition: table_c.h:42
static const char * OP_LE
Definition: searchnames.h:39
static const char * DOCUMENT_TAG
Definition: searchnames.h:20
Xpace project main namespace
Definition: datetime.h:18
static const char * PARSE_TREE_TAG
Definition: searchnames.h:23

current as of Wed Jun 10 2026 12:00:05