casacore
MultiFileBase.h
Go to the documentation of this file.
1 //# MultiFileBase.h: Abstract base class to combine multiple files in a single one
2 //# Copyright (C) 2014
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
27 
28 #ifndef CASA_MULTIFILEBASE_H
29 #define CASA_MULTIFILEBASE_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/casa/IO/ByteIO.h>
34 #include <casacore/casa/BasicSL/String.h>
35 #include <casacore/casa/Utilities/CountedPtr.h>
36 #include <casacore/casa/vector.h>
37 #include <casacore/casa/ostream.h>
38 
39 
40 namespace casacore { //# NAMESPACE CASACORE - BEGIN
41 
42  //# Forward declaration.
43  class AipsIO;
44  class HDF5Group;
45  class HDF5DataSet;
46 
47 
48  // <summary>
49  // Helper class for MultiFileBase containing info per internal file
50  // </summary>
51  // <use visibility=local>
52  struct MultiFileInfo {
53  explicit MultiFileInfo (Int64 bufSize=0);
54  vector<Int64> blockNrs; // physical blocknrs for this logical file
55  vector<char> buffer; // buffer holding a data block
56  Int64 curBlock; // the data block held in buffer (<0 is none)
57  Int64 fsize; // file size (in bytes)
58  String name; // the virtual file name
59  Bool dirty; // has data in buffer been changed?
62  };
63  void operator<< (ostream&, const MultiFileInfo&);
64  void operator<< (AipsIO&, const MultiFileInfo&);
66 
67 
68  // <summary>
69  // Abstract base class to combine multiple files in a single one.
70  // </summary>
71 
72  // <use visibility=export>
73 
74  // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
75  // </reviewed>
76 
77  // <synopsis>
78  // This class is a container file holding multiple virtual files. It is
79  // primarily meant as a container file for the storage manager files of a
80  // table to reduce the number of files used (especially for Lustre) and to
81  // reduce the number of open files (especially when concatenating tables).
82  // <br>A secondary goal is offering the ability to use an IO buffer size
83  // that matches the file system well (large buffer size for e.g. ZFS).
84  //
85  // The SetupNewTable constructor has a StorageOption argument to define
86  // if a MultiFile has to be used and if so, the buffer size to use.
87  // It is also possible to specify that through aipsrc variables.
88  //
89  // A virtual file is spread over multiple (fixed size) data blocks in the
90  // MultiFile. A data block is never shared by multiple files.
91  // For each virtual file MultiFile keeps a MultiFileInfo object telling
92  // the file size and the blocks numbers used for the file. When flushing
93  // the MultiFile, this meta info is written into a header block and,
94  // if needed, continuation blocks. On open and resync, it is read back.
95  // <br>
96  //
97  // A virtual file is represented by an MFFileIO object, which is derived
98  // from ByteIO and as such part of the casacore IO framework. It makes it
99  // possible for applications to access a virtual file in the same way as
100  // a regular file.
101  //
102  // It is possible to delete a virtual file. Its blocks will be added to
103  // the free block list (which is also stored in the meta info).
104  // </synopsis>
105 
106  // <example>
107  // In principle it is possible to use the MultiFile functions directly.
108  // However, in general it is much easier to use an MFFileIO object
109  // per virtual file as shown below.
110  // <srcblock>
111  // // Create a new MultiFile using a block size of 1 MB.
112  // MultiFile mfile("file.mf', ByteIO::New, 1048576);
113  // // Create a virtual file in it.
114  // MFFileIO mf1(mfile, "mf1", ByteIO::New);
115  // // Use it (for example) as the sink of AipsIO.
116  // AipsIO stream (&mf1);
117  // // Write values.
118  // stream << (Int)10;
119  // stream << True;
120  // // Seek to beginning of file and read data in.
121  // stream.setpos (0);
122  // Int vali;
123  // Bool valb;
124  // stream >> vali >> valb;
125  // </srcblock>
126  // </example>
127 
128  // <todo>
129  // <li> write headers at alternating file positions (for robustness)
130  // <li> possibly write headers entirely at the end if larger than blocksize
131  // </todo>
132 
133 
135  {
136  public:
137  // Open or create a MultiFileBase with the given name.
138  // Upon creation the block size can be given. If 0, it uses the block size
139  // of the file system the file is on.
140  MultiFileBase (const String& name, Int blockSize=0);
141 
142  // The destructor flushes and closes the file.
143  virtual ~MultiFileBase();
144 
145  // Return the file id of a file in the MultiFileBase object.
146  // If the name is unknown, an exception is thrown if throwExcp is set.
147  // Otherwise it returns -1.
148  Int fileId (const String& name, Bool throwExcp=True) const;
149 
150  // Add a file to the MultiFileBase object. It returns the file id.
151  // Only the base name of the given file name is used. In this way the
152  // MultiFileBase container file can be moved.
153  Int addFile (const String& name);
154 
155  // Delete a file. It adds its blocks to the free block list.
156  void deleteFile (Int fileId);
157 
158  // Read a block at the given offset. It returns the actual size read.
159  Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
160 
161  // Write a block at the given offset. It returns the actual size written.
162  Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
163 
164  // Flush the file by writing all dirty data and all header info.
165  void flush();
166 
167  // Resync with another process by clearing the buffers and rereading
168  // the header. The header is only read if its counter has changed.
169  void resync();
170 
171  // Reopen the underlying file for read/write access.
172  // Nothing will be done if the file is writable already.
173  // Otherwise it will be reopened and an exception will be thrown
174  // if it is not possible to reopen it for read/write access.
175  virtual void reopenRW() = 0;
176 
177  // Fsync the file (i.e., force the data to be physically written).
178  virtual void fsync() = 0;
179 
180  // Get the file name of the MultiFileBase.
181  String fileName() const
182  { return itsName; }
183 
184  // Is the file writable?
185  Bool isWritable() const
186  { return itsWritable; }
187 
188  // Get the block size used.
189  Int64 blockSize() const
190  { return itsBlockSize; }
191 
192  // Get the nr of virtual files.
193  uInt nfile() const;
194 
195  // Get the total nr of data blocks used.
196  Int64 size() const
197  { return itsNrBlock; }
198 
199  // Get the info object (for test purposes mainly).
200  const vector<MultiFileInfo>& info() const
201  { return itsInfo; }
202 
203  // Get the free blocks (for test purposes mainly).
204  const vector<Int64>& freeBlocks() const
205  { return itsFreeBlocks; }
206 
207  private:
209  {
210  writeBlock (info, info.curBlock, &(info.buffer[0]));
211  info.dirty = False;
212  }
213 
214  // Do the class-specific actions on adding a file.
215  virtual void doAddFile (MultiFileInfo&) = 0;
216  // Do the class-specific actions on deleting a file.
217  virtual void doDeleteFile (MultiFileInfo&) = 0;
218  // Flush the file itself.
219  virtual void flushFile() = 0;
220  // Flush and close the file.
221  virtual void close() = 0;
222  // Write the header info.
223  virtual void writeHeader() = 0;
224  // Read the header info. If always==False, the info is only read if the
225  // header counter has changed.
226  virtual void readHeader (Bool always=True) = 0;
227  // Extend the virtual file to fit lastblk.
228  virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
229  // Write a data block.
230  virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
231  const void* buffer) = 0;
232  // Read a data block.
233  virtual void readBlock (MultiFileInfo& info, Int64 blknr,
234  void* buffer) = 0;
235 
236  protected:
237  // Set the flags and blockSize for a new MultiFile/HDF5.
238  void setNewFile();
239 
240  //# Data members
242  Int64 itsBlockSize; // The blocksize used
243  Int64 itsNrBlock; // The total nr of blocks actually used
244  Int64 itsHdrCounter; // Counter of header changes
245  vector<MultiFileInfo> itsInfo;
246  Bool itsWritable; // Is the file writable?
247  Bool itsChanged; // Has header info changed since last flush?
248  vector<Int64> itsFreeBlocks;
249  };
250 
251 
252 } //# NAMESPACE CASACORE - END
253 
254 #endif
MultiFileInfo(Int64 bufSize=0)
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
int Int
Definition: aipstype.h:50
Abstract base class to combine multiple files in a single one.
AipsIO is the object persistency mechanism of Casacore.
Definition: AipsIO.h:168
CountedPtr< HDF5DataSet > dataSet
Definition: MultiFileBase.h:61
Helper class for MultiFileBase containing info per internal file.
Definition: MultiFileBase.h:52
ostream & operator<<(ostream &os, const IComplex &)
Show on ostream.
String fileName() const
Get the file name of the MultiFileBase.
vector< char > buffer
Definition: MultiFileBase.h:55
vector< Int64 > blockNrs
Definition: MultiFileBase.h:54
const vector< Int64 > & freeBlocks() const
Get the free blocks (for test purposes mainly).
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool False
Definition: aipstype.h:44
CountedPtr< HDF5Group > group
Definition: MultiFileBase.h:60
Int64 blockSize() const
Get the block size used.
const vector< MultiFileInfo > & info() const
Get the info object (for test purposes mainly).
String: the storage and methods of handling collections of characters.
Definition: String.h:223
vector< Int64 > itsFreeBlocks
AipsIO & operator>>(AipsIO &os, Record &rec)
Definition: Record.h:465
vector< MultiFileInfo > itsInfo
void writeDirty(MultiFileInfo &info)
Bool isWritable() const
Is the file writable?
const Bool True
Definition: aipstype.h:43
this file contains all the compiler specific defines
Definition: mainpage.dox:28
Int64 size() const
Get the total nr of data blocks used.
unsigned int uInt
Definition: aipstype.h:51