Xpace
file.h
Go to the documentation of this file.
1 
2 /**********************************************************//**
3  **
4  ** @file util/file.h
5  **
6  ** Copyright (C) 2016 Xpace, LLC. All rights reserved
7  **
8  ** www.xpace.net
9  **
10  **************************************************************/
11 
12 
13 #if !defined(XPACE_FILE_H)
14 #define XPACE_FILE_H
15 
16 #if !defined NOMINMAX
17 #define NOMINMAX
18 #endif
19 
20 #if _WIN32 || _WIN64
21 // for HANDLE
22 # include <windows.h>
23 # pragma warning(push)
24 # pragma warning(disable : 4251)
25 #else
26 # include <cstdio>
27 #endif
28 #include <algorithm>
29 #include <cstring>
30 #include <cassert>
31 
32 #include "base/types.h"
33 #include "base/exception.h"
34 
35 namespace Xpace
36 {
38  {
39  public:
40  typedef uint64 Position;
41  typedef int64 Distance;
42  static const Position errorPosition;
43 
44  /// constructor - Create an empty file
45  File
46  ();
47 
48  /// creates an independent copy
49  /// rhs must be read-only; result is read-only
50  /// @rhs copy this
51  File
52  (const File& rhs);
53 
54  /// assign an independent copy
55  /// rhs must be read-only; result is read-only
56  /// @rhs copy this
57  File& operator=
58  (const File& rhs);
59 
60  /// thrown if either of the above two fails
61  /// e.g., if rhs is not read-only
62  class Cant_Copy : public Exception
63  {
64  public:
65  Cant_Copy
66  (const File& f) :
67  Exception("Can't copy file \"%1\".")
68  {
69  addParam(f.getName());
70  }
71  };
72 
73  /// destructor - closes if open
74  virtual ~File
75  ();
76 
77  enum OpenMode
78  {
79  ReadSequential, ///< open existing for (primarily) sequential reading
80  ReadRandom, ///< open existing for (primarily) random reading
81  WriteSequential, ///< open existing or new for (primarily) sequential writing
82  WriteSequentialNew, ///< create for (primarily) sequential writing
83  WriteRandom, ///< open existing or new for (primarily) random writing
84  WriteRandomNew, ///< create for (primarily) random writing
85  };
86 
87  enum Type
88  {
89  Raw, ///< an untyped file from an external source
90  Any, ///< match any type (except raw)
95  IndexMap
96  };
97 
98  /// @param name full path name
99  /// @param mode file open mode
100  /// @param type the file's type; an attempt to open an existing file of the wrong type will fail
101  /// @param bufSize: if 0, use defaults
102  /// @return true iff successful
103  virtual bool open
104  (const String name,
105  OpenMode mode,
106  Type type = Any,
107  size_t bufSize = 0);
108 
109  /// Open a special file, which is not
110  /// seekable, such as a pipe
111  /// @param handle OS handle
112  /// @param bufSize: if 0, use defaults
113  virtual void fromHandle
114 #if _WIN32
115  (HANDLE handle,
116 #else
117  (FILE* handle,
118 #endif
119  size_t bufSize = 0);
120 
121  /// @return true iff the file is not open
122  virtual bool operator!
123  ()
124  const;
125 
126  /// @return the file's name
127  virtual String getName
128  ()
129  const;
130 
131 // /// @return the file's Type
132 // virtual Type getType
133 // ()
134 // const;
135 
136  /// @return the file's internal buffer size
137  virtual size_t getBufSize
138  ()
139  const;
140 
141  /// Make sure all changes are written
142  virtual bool flush
143  ();
144 
145  /// Close the file, flushing if necessary
146  virtual bool close
147  ();
148 
149  /// delete the file, if it exists
150  /// file does not have to be open
151  /// @return true iff file existed and successfully deleted
152  virtual bool remove
153  ();
154 
155  /// Read from the file
156  /// @param dest destination buffer
157  /// @param bytes number of bytes requested
158  /// @return actual bytes read; read will stop at end-of-file
159  size_t read
160  (void* dest,
161  size_t bytes);
162 
163  /// Read a T from the file
164  /// @param dest fillin destination T
165  /// @return true iff successful
166  template <typename T>
167  bool read
168  (T* dest);
169 
170  /// Read a T from the file
171  /// @return the T
172  /// @throw File_Cant_Read iff unsuccessful
173  template <typename T>
174  T read
175  ();
176 
177  /// Read thehe remainder of the as a 16-bit char string
178  /// @param raw read remainder of file as one string,
179  /// otherwise read string written with writeString
180  /// @param maxLen read no more than this many chars
181  /// @return read String
183  (bool raw = false,
184  size_t maxLen = 0x100000);
185 
186  /// Read the remainder of the file as an 8-bit char string
187  /// @param maxLen read no more than this many chars
188  /// @return read String
189  String readString8
190  (size_t maxLen = 0x100000);
191 
192  /// Write some data to the file
193  /// @param src source buffer
194  /// @param bytes number of bytes requested
195  /// @return number of bytes written
196  size_t write
197  (const void* src,
198  size_t bytes);
199 
200  /// Write a T to the file
201  /// @param src source T
202  /// @return true if successful
203  template <typename T>
204  bool write
205  (const T& src);
206 
207  /// Write a String to the file
208  /// @param str String to be written
209  /// @param raw if false, write for readString(), if true just write chars
210  /// @return true if successful
211  bool writeString
212  (const String& str,
213  bool raw = false);
214 
215  /// Get by direct access to the file buffer
216  /// not guaranteed to be valid after successive calls
217  /// @param dest ptr to data file buffer
218  /// @param count number of Ts requested
219  /// @return actual Ts available
220  template <typename T>
221  size_t get
222  (T** dest,
223  size_t bytes);
224 
225  /// Get bytes until a delimiter
226  /// @param dest ptr to data file buffer
227  /// @param delim the delimiter
228  /// @return bytes between dest and the delimiter
229  template <typename T>
230  size_t getUntil
231  (T** dest,
232  T delim);
233 
234  /// Get a T by direct access to the file buffer
235  /// return not guaranteed to be valid after successive calls
236  /// @return ptr to T on success, 0 if fail
237  template <typename T>
238  T* get
239  ();
240 
241  /// Get file length
242  /// @return offset of first byte past end-of-file
243  virtual Position getLength
244  ()
245  const;
246 
247  /// Get length of a file by name
248  /// @param name full path name
249  /// @return filelength; or positionError if file doesn't exist
250  virtual Position getLength
251  (const String name);
252 
253  /// Get current position
254  /// @return current offset in file; -1 if file not open
255  virtual Position getPos
256  ()
257  const;
258 
259  /// Move current position
260  /// @param pos seek to this position
261  /// @return true if successful
262  virtual bool seek
263  (Position pos);
264 
265  /// Move current position
266  /// @param dist move this amount
267  /// @return true if successful
268  bool seekRel
269  (Distance dist);
270 
271  /// @return how much can be read before a physical read
272  size_t bufRemains
273  ()
274  const;
275 
276  /// copy from this file to another
277  /// @param targ copy to this file
278  /// @param size copy this many bytes
279  /// @return true if successful
280  bool copy
281  (File* targ,
282  File::Position size);
283 
284  // describes a piece of a file
285  struct Segment
286  {
287  Segment(File* file,
288  Position offset,
289  Position size) :
290  file(file),
291  offset(offset),
292  size(size)
293  {
294  }
296  const Position offset; ///< start offset of this ref list
297  const Position size; ///< length of this ref list
298  };
299 
300  protected:
301  // defaults
302  enum { SEQ_BUF_SIZE = 65536, RAND_BUF_SIZE = 16384 };
303 
306 
307  Type type; ///< the file's type
308  bool writable; ///< opened for write
309  bool bufDirty; ///< is buffer modified
310  bool seekable; ///< is not a special file
311  uint64 bufOffset; ///< offset of buffer in file
312  size_t bufSize; ///< actual buffer size
313  byte* buffer; ///< buffer
314  byte* current; ///< current position in buffer
315  byte* end; ///< end of data in buffer
316 
317  virtual bool seekFile
318  (uint64);
319  virtual size_t fillBuffer
320  ();
321 
322  // low-level implementations
323  // called only when buffer exhausted
324  virtual size_t fileRead
325  (byte* dest,
326  size_t bytes);
327  virtual size_t fileWrite
328  (const byte* src,
329  size_t bytes);
330 
331  #if _WIN32 || _WIN64
332  HANDLE hd;
333  #else
334  FILE* hd;
335  #endif
336 
337  enum { OS_BUF_SIZE = 4096 };
338 
339  private:
340  std::vector<byte> g_buffer; ///< buffer for put()/get()
341  std::vector<byte> temp;
342  };
343 
344  class File_Cant_Open : public Exception
345  {
346  public :
348  (String fname,
349  File::OpenMode mode) :
350  Exception("Can't open file \"%1\" for %2.")
351  {
352  addParam(fname);
353  switch (mode)
354  {
355  case File::ReadSequential :
356  addParam("sequential read");
357  break;
358  case File::ReadRandom :
359  addParam("random read");
360  break;
361  case File::WriteSequential :
362  addParam("sequential write");
363  break;
365  addParam("sequential write new)");
366  break;
367  case File::WriteRandom :
368  addParam("random write");
369  break;
370  case File::WriteRandomNew :
371  addParam("random write (new)");
372  break;
373  }
374  }
375  };
376 
377  class File_Cant_Read : public Exception
378  {
379  public :
381  (const File& f) :
382  Exception("Can't read file \"%1\" at position \"%2\".")
383  {
384  addParam(f.getName());
385  addParam(String().setNum(f.getPos()));
386  }
387  };
388 
389  class File_Cant_Write : public Exception
390  {
391  public :
393  (const File& f) :
394  Exception("Can't write file \"%1\" at position \"%2\".")
395  {
396  addParam(f.getName());
397  addParam(String().setNum(f.getPos()));
398  }
399  };
400 
401  class File_Corrupt : public Exception
402  {
403  public :
405  (const File& f) :
406  Exception("File \"%1\" corrupt at position \"%2\".")
407  {
408  addParam(f.getName());
409  addParam(String().setNum(f.getPos()));
410  }
411  };
412 
413  class File_Cant_Close : public Exception
414  {
415  public :
417  (const File& f) :
418  Exception("Can't close file \"%1\".")
419  {
420  addParam(f.getName());
421  }
422  };
423 
424  // ============================================================
425  // ============================================================
426  // ============================================================
427 
428  inline
429  bool File::operator!
430  ()
431  const
432  {
433  #if defined _WIN32
434  return (hd == INVALID_HANDLE_VALUE);
435  #else
436  return (hd == 0);
437  #endif
438  }
439 
440  inline
442  ()
443  const
444  {
445  return name;
446  }
447 
448  inline
449  size_t File::getBufSize
450  ()
451  const
452  {
453  return bufSize;
454  }
455 
456  inline
457  size_t File::read
458  (void* dest,
459  size_t bytes)
460  {
461  size_t bytes_to_read = std::min(static_cast<size_t>(end - current), bytes);
462  memcpy(dest, current, bytes_to_read);
463  if (bytes_to_read == bytes)
464  {
465  current += bytes;
466  return bytes;
467  }
468  return bytes_to_read
469  + fileRead(reinterpret_cast<byte*>(dest) + bytes_to_read,
470  bytes - bytes_to_read);
471  }
472 
473  template <typename T>
474  inline
475  bool File::read
476  (T* dest)
477  {
478  return (read(reinterpret_cast<byte*>(dest), sizeof(T)) == sizeof(T));
479  }
480 
481  template <typename T>
482  inline
483  T File::read
484  ()
485  {
486  T t;
487  if (!read(&t))
488  throw File_Cant_Read(*this);
489  return t;
490  }
491 
492  template <typename T>
493  inline
494  size_t File::get
495  (T** dest,
496  size_t count)
497  {
498  size_t bytes(count * sizeof(T));
499  size_t bytes_to_get = std::min(static_cast<size_t>(end - current), bytes);
500  if (bytes_to_get == bytes)
501  {
502  *dest = reinterpret_cast<T*>(current);
503  current += bytes;
504  return count;
505  }
506 
507  if ((current == end) && (bytes <= bufSize))
508  {
509  if (end != buffer)
510  bufOffset += bufSize;
511  *dest = reinterpret_cast<T*>(buffer);
512  size_t ret(std::min(bytes, fillBuffer()));
513  current = buffer + ret;
514  return ret / sizeof(T);
515  }
516 
517  if (bytes > g_buffer.size())
518  g_buffer.resize(bytes);
519 
520  byte* d;
521  bytes = read(d = &g_buffer[0], bytes);
522  *dest = reinterpret_cast<T*>(d);
523  return bytes / sizeof(T);
524  }
525 
526  // @todo: worry about Ts that straddle buffers
527  template <typename T>
528  inline
529  size_t File::getUntil
530  (T** dest,
531  T delim)
532  {
533  byte* start(current);
534  T* t(reinterpret_cast<T*>(current));
535 
536  temp.clear();
537 
538  while (1)
539  {
540  if (t >= reinterpret_cast<T*>(end))
541  {
542  uint copy_size(end - start);
543  if (copy_size)
544  {
545  temp.resize(copy_size);
546  memcpy(&temp[0], start, copy_size);
547  }
548  else
549  start = buffer;
550 
551  bufOffset += end - buffer;
552  if (!fillBuffer())
553  {
554  *dest = 0;
555  return 0;
556  }
557  t = reinterpret_cast<T*>(buffer);
558  }
559 
560  if (*t == delim)
561  break;
562  ++t;
563  }
564 
565  current = reinterpret_cast<byte*>(t);
566 
567  size_t ret;
568  if (!temp.empty())
569  {
570  ret = temp.size() + current - buffer;
571  if (ret > g_buffer.size())
572  g_buffer.resize(ret);
573  memcpy(&g_buffer[0], &temp[0], temp.size());
574  if (current - buffer)
575  memcpy(&g_buffer[temp.size()], buffer, current - buffer);
576  *dest = reinterpret_cast<T*>(&g_buffer[0]);
577  }
578  else
579  {
580  *dest = reinterpret_cast<T*>(start);
581  ret = current - start;
582  }
583 
584  // advance past delim
585  current += sizeof(T);
586 
587  return ret / sizeof(T);
588  }
589 
590  template <typename T>
591  inline
592  T* File::get
593  ()
594  {
595  T* t;
596  return (get(reinterpret_cast<byte**>(&t), sizeof(T)) == sizeof(T))
597  ? t : 0;
598  }
599 
600  inline
601  size_t File::write
602  (const void* src,
603  size_t bytes)
604  {
605  if (!writable)
606  return 0;
607 
608  size_t bytes_to_copy = std::min(bufSize - static_cast<size_t>(current - buffer), bytes);
609  memcpy(current, src, bytes_to_copy);
610  bufDirty = true;
611 
612  if (bytes_to_copy == bytes)
613  current += bytes;
614  else
615  bytes = bytes_to_copy
616  + fileWrite(reinterpret_cast<const byte*>(src) + bytes_to_copy,
617  bytes - bytes_to_copy);
618 
619  end = std::max(end, current);
620 
621  return bytes;
622  }
623 
624  template <typename T>
625  inline
626  bool File::write
627  (const T& src)
628  {
629  return (write(&src, sizeof(T)) == sizeof(T));
630  }
631 
632  inline
633  bool File::seekRel
634  (Distance dist)
635  {
636  if (dist < 0)
637  {
638  if (static_cast<int>(buffer - current) <= dist)
639  {
640  current += dist;
641  return true;
642  }
643  }
644  else
645  {
646  if (end - current >= dist)
647  {
648  current += dist;
649  return true;
650  }
651  }
652  return seek(getPos() + dist);
653  }
654 
655  inline
656  size_t File::bufRemains
657  ()
658  const
659  {
660  return end - current;
661  }
662 
663 } // namespace
664 
665 #if _WIN32 || _WIN64
666 # pragma warning(pop)
667 #endif
668 
669 #endif
670 
const Xpace_Char16 Xpace_Data_Type type
Definition: table_c.h:141
size_t bufSize
actual buffer size
Definition: file.h:312
A low-level data holder.
Definition: types_c.h:82
create for (primarily) random writing
Definition: file.h:84
size_t write(const void *src, size_t bytes)
Write some data to the file.
Definition: file.h:602
byte * current
current position in buffer
Definition: file.h:314
an untyped file from an external source
Definition: file.h:89
unsigned int uint
Definition: types.h:75
Type type
the file&#39;s type
Definition: file.h:307
create for (primarily) sequential writing
Definition: file.h:82
FILE * hd
Definition: file.h:334
A string, Unicode UTF-16 and reference-counted.
Definition: types.h:269
void readString(MemBitStream *b, CH *ch, size_t length)
Definition: compress.h:203
int64 Distance
Definition: file.h:41
const Position offset
start offset of this ref list
Definition: file.h:296
virtual size_t getBufSize() const
Definition: file.h:450
bool seekRel(Distance dist)
Move current position.
Definition: file.h:634
virtual Position getPos() const
Get current position.
unsigned long long uint64
Definition: types.h:87
String name
Definition: file.h:304
bool writeString(const CH *str, size_t length, MemBitStream *b)
Definition: compress.h:259
uint64 Position
Definition: file.h:40
match any type (except raw)
Definition: file.h:90
uint64 bufOffset
offset of buffer in file
Definition: file.h:311
bool bufDirty
is buffer modified
Definition: file.h:309
size_t bufRemains() const
Definition: file.h:657
const Xpace_Char16 * name
Sink callbacks for table data.
Definition: table_c.h:141
T read()
Read a T from the file.
Definition: file.h:484
open existing for (primarily) random reading
Definition: file.h:80
bool seekable
is not a special file
Definition: file.h:310
bool writable
opened for write
Definition: file.h:308
OpenMode
Definition: file.h:77
T * get()
Get a T by direct access to the file buffer return not guaranteed to be valid after successive calls...
Definition: file.h:593
byte * buffer
buffer
Definition: file.h:313
long long int64
Definition: types.h:86
OpenMode mode
Definition: file.h:305
thrown if either of the above two fails e.g., if rhs is not read-only
Definition: file.h:62
byte * end
end of data in buffer
Definition: file.h:315
Segment(File *file, Position offset, Position size)
Definition: file.h:287
static const Position errorPosition
Definition: file.h:42
const Position size
length of this ref list
Definition: file.h:297
open existing or new for (primarily) sequential writing
Definition: file.h:81
virtual String getName() const
Definition: file.h:442
Xpace project main namespace
Definition: datetime.h:18
open existing or new for (primarily) random writing
Definition: file.h:83
open existing for (primarily) sequential reading
Definition: file.h:79
uchar byte
Definition: types.h:74
size_t getUntil(T **dest, T delim)
Get bytes until a delimiter.
Definition: file.h:530

current as of Wed Jun 10 2026 12:00:05