Xpace
tablesinktostore.h
Go to the documentation of this file.
1 
2 
3 /**************************************************************
4  **
5  ** @file data/store/tablesinktostore.h
6  **
7  ** Copyright (C) 2012 Xpace, LLC. All rights reserved
8  **
9  ** www.xpace.net
10  **
11  **************************************************************/
12 
13 #include "data/store/table.h"
14 #include "data/store/store.h"
15 #include "data/store/classtree.h"
16 
17 namespace Xpace
18 {
19  // ================================ CONVERT TABLE -> STORE ==
20 
22  {
23  public:
24  static const char* DOC_ID_TAG; ///< tag of the field (column) that contains the document identifier
25  static const char* LOC_TAG; ///< tag of fields (columns) that contain location in the final store
26 
28  (const Configuration& config);
29 
30  virtual uint add
31  (const String& name,
34  bool* added = 0) override
35  {
36  assert(0);
37  return ~0;
38  }
39 
40  virtual uint add
41  (const Configuration& config,
42  uint col,
43  bool* added) override;
44 
45  virtual int64 start
46  (uint64) override
47  {
48  if (!targ)
49  {
50  store = std::unique_ptr<Store>(new Store(class_tree.toString(), SA_bulkWrite));
51  targ = store->getSink();
52  }
53  return 0;
54  }
55 
56  virtual bool set
58  int64 value) override
59  {
60  return id_set(column, value);
61  }
62 
63  virtual bool set
65  const DecimalFloat& value) override
66  {
67  return no_id_set(column, value);
68  }
69 
70  virtual bool set
72  const String8& value) override
73  {
74  return id_set(column, value);
75  }
76 
77  virtual bool set
79  const String16& value) override
80  {
81  // TODO: 16-bit IDs
82  return no_id_set(column, value);
83  }
84 
85  virtual bool set
87  const BytesRef& value) override
88  {
89  return no_id_set(column, value);
90  }
91 
92  virtual void close
93  () override
94  {
95  if (targ)
96  targ->close();
97  }
98 
99  class NoDocID : public Exception
100  {
101  public:
102  NoDocID
103  ()
104  {
105  }
106  };
107 
108  class BadCode : public Exception
109  {
110  public:
111  BadCode
112  (const String& code) :
113  Exception("Bad code \"%1\".")
114  {
115  addParam(code);
116  }
117  };
118 
119  private:
120  std::unique_ptr<Store> store;
121  Store::Sink* targ;
122  uint64 targ_doc;
123 
124  struct ct_node
125  {
126  bool isLeaf;
127  };
128  ClassTree<ct_node> class_tree;
129 
130  struct code_node
131  {
132  uint child;
133  std::map<String8, code_node> codes;
134  };
135  code_node root_cn, *cur_cn;
136  BytePool code_pool;
137 
138  uint columns;
139 
140  String doc_id_name;
141  std::vector<String> loc_names;
142 
143  uint doc_id_column;
144  std::vector<uint> loc_columns;
145 
146  std::vector<std::vector<uint>> data_columns;
147 
148  class equal_doc_id
149  {
150  public:
151  equal_doc_id
152  ();
153  bool operator()
154  (int64 n);
155  bool operator()
156  (const DecimalFloat& n);
157  bool operator()
158  (const String8& str);
159  bool operator()
160  (const String16& str);
161 
162  private:
163  bool first;
164 
165  int64 int_val;
166  DecimalFloat float_val;
167  String8 string8_val;
168  String16 string16_val;
169  std::vector<byte> bytesref_val;
170 
171  std::vector<utf8_t> string8_buf;
172  std::vector<utf16_t> string16_buf;
173  };
174  equal_doc_id eq_doc_id;
175 
176  std::vector<bool> is_id;
177  void id_column
178  (uint col);
179 
180  template <typename T>
181  bool set_targ
182  (uint column,
183  T t)
184  {
185  if (!cur_data_column)
186  {
187  cur_data_column = &data_columns[first_data_column];
188  targ->move(0);
189  }
190  uint cur_sibling(cur_data_column->front());
191  cur_data_column = &data_columns[column];
192  assert(cur_data_column->front() >= cur_sibling);
193  if ((cur_data_column->front() != cur_sibling) &&
194  ((!targ->move(-1) || !targ->move(cur_data_column->front()))))
195  return false;
196 
197  return targ->set((*cur_data_column)[1], t);
198  }
199 
200  template <typename T>
201  bool no_id_set
202  (uint column,
203  T t)
204  {
205  assert(!is_id[column]);
206  return set_targ(column, t);
207  }
208 
209  template <typename T>
210  bool check_doc_id
211  (T t)
212  {
213  cur_data_column = 0;
214  cur_cn = &root_cn;
215  return (eq_doc_id(t)) ? targ->move(Store::Location()) : !!targ->start(targ_doc);
216  }
217 
218  bool id_set
219  (uint column,
220  int64 t)
221  {
222  if (column == doc_id_column)
223  return check_doc_id(t);
224  return set_targ(column, t);
225  }
226 
227  bool id_set
228  (uint column,
229  const String8& t)
230  {
231  if ((column < is_id.size()) && is_id[column])
232  {
233  if (column == doc_id_column)
234  return check_doc_id(t);
235  // move based on code
236  if ((column == doc_id_column + 1) && (cur_cn != &root_cn))
237  throw NoDocID();
238  auto new_cn(cur_cn->codes.find(t));
239  if (new_cn == cur_cn->codes.end())
240  throw BadCode(String(t.data, t.length));
241  cur_cn = &(new_cn->second);
242  return targ->move(cur_cn->child);
243  }
244  return set_targ(column, t);
245  }
246 
247  // status
248  String vector_config;
249  size_t first_data_column;
250  const std::vector<uint>* cur_data_column;
251  };
252 }
virtual int64 start(uint64 docNum)
Begin a new document/record/row, committing current one (if any)
Definition: data.h:282
uint * columns
Definition: table_c.h:340
const Xpace_Char16 Xpace_Data_Type type
Definition: table_c.h:141
virtual RET set(LOC location, int64 value)
Write an int64.
Definition: data.h:357
A write-only, forward (by document) cursor into a Store.
Definition: store.h:79
A low-level data holder.
Definition: types_c.h:82
static const char * LOC_TAG
tag of fields (columns) that contain location in the final store
unsigned int uint
Definition: types.h:75
Each high-level Xpace object has a Configuration.
Definition: config.h:29
const Xpace_Char16 Xpace_Data_Type uint bool * added
Definition: table_c.h:141
A string, Unicode UTF-16 and reference-counted.
Definition: types.h:269
A low-level const data holder.
Definition: types.h:165
A floatimg-point number with explicit mantissa and decimals TODO: normalize.
Definition: decimalfloat.h:33
const Xpace_Char16 Xpace_Data_Type uint columnNum
Definition: table_c.h:141
static const char * DOC_ID_TAG
tag of the field (column) that contains the document identifier
A write-only, forward cursor into a table.
Definition: table.h:142
unsigned long long uint64
Definition: types.h:87
size_t length
Definition: types.h:207
uint col
Definition: table_c.h:69
const Xpace_Char16 * name
Sink callbacks for table data.
Definition: table_c.h:141
uint int64 value
Definition: table_c.h:159
uint column
Add a 64-bit int to the row.
Definition: table_c.h:108
long long int64
Definition: types.h:86
virtual RET move(LOC location)
Move without setting data.
Definition: data.h:346
Xpace project main namespace
Definition: datetime.h:18
std::vector< uint > Location
A location within a source.
Definition: store.h:64
A Store is a hierarchically organized dataset, suitable for managing structured text and multidimensi...
Definition: store.h:26

current as of Wed Jun 10 2026 12:00:05