casacore
Loading...
Searching...
No Matches
ISMBucket.h
Go to the documentation of this file.
1//# ISMBucket.h: A bucket in the Incremental Storage Manager
2//# Copyright (C) 1996,1999,2000,2001
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id$
27
28#ifndef TABLES_ISMBUCKET_H
29#define TABLES_ISMBUCKET_H
30
31//# Includes
32#include <casacore/casa/aips.h>
33#include <casacore/casa/Containers/Block.h>
34#include <casacore/casa/BasicSL/String.h>
35#include <casacore/casa/iosfwd.h>
36
37namespace casacore { //# NAMESPACE CASACORE - BEGIN
38
39//# Forward declarations
40class ISMBase;
41
42// <summary>
43// A bucket in the Incremental Storage Manager
44// </summary>
45
46// <use visibility=local>
47
48// <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
49// </reviewed>
50
51// <prerequisite>
52//# Classes you should understand before using this one.
53// <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto>
54// <li> <linkto class=BucketCache>BucketCache</linkto>
55// </prerequisite>
56
57// <etymology>
58// ISMBucket represents a bucket in the Incremental Storage Manager.
59// </etymology>
60
61// <synopsis>
62// The Incremental Storage Manager uses a <linkto class=BucketCache>
63// BucketCache</linkto> object to read/write/cache the buckets
64// containing the data. An <src>ISMBucket</src> object is the
65// internal representation of the contents of a bucket. <src>ISMBucket</src>
66// contains static callback functions which are called by
67// <src>BucketCache</src> when reading/writing a bucket. These callback
68// functions do the mapping of bucket data to <src>ISMBucket</src> object
69// and vice-versa.
70// <p>
71// A bucket contains the values of several rows
72// of all columns bound to this Incremental Storage Manager.
73// A bucket is split into a data part and an index part.
74// Each part has an arbitrary length but together they do not exceed
75// the fixed bucket length.
76// <p>
77// The beginning of the data part contains the values of all columns
78// bound. The remainder of the data part contains the values of
79// the rows/columns with a changed value.
80// <br>
81// The index part contains an index per column. Each index contains the
82// row number and an offset for a row with a stored value. The row numbers
83// are relative to the beginning of the bucket, so the bucket has
84// no knowledge about the absolute row numbers. In this way deletion of
85// rows is much simpler.
86// <p>
87// The contents of a bucket looks like:
88// <srcblock>
89// -------------------------------------------------------------------
90// | index offset | data part | index part | free |
91// -------------------------------------------------------------------
92// 0 4 4+length(data part)
93// <--------------------------bucketsize----------------------------->
94// </srcblock>
95// The data part contains all data value belonging to the bucket.
96// The index part contains for each column the following data:
97// <srcblock>
98// -----------------------------------------------------------------------
99// | #values stored | row numbers of values | offset in data part of |
100// | for column i | stored for column i | values stored for column i |
101// -----------------------------------------------------------------------
102// 0 4 4+4*nrval
103// </srcblock>
104// Note that the row numbers in the bucket start at 0, thus are relative
105// to the beginning of the bucket. The main index kept in
106// <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of
107// each bucket. In this way bucket splitting and especially row removal
108// is much easier.
109// <p>
110// The bucket can be stored in canonical or local (i.e. native) data format.
111// When a bucket is read into memory, its data are read, converted, and
112// stored in the ISMBucket object. When flushed, the contents are
113// written. ISMBucket takes care that the values stored in its object
114// do not exceed the size of the bucket. When full, the user can call
115// a function to split it into a left and right bucket. When the new
116// value has to be written at the end, the split merely consist of
117// creating a new bucket. In any case, care is taken that a row is
118// not split. Thus a row is always entirely contained in one bucket.
119// <p>
120// Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual
121// writing of data in a bucket and uses the relevant ISMBucket functions.
122
123// <motivation>
124// ISMBucket encapsulates the data of a bucket.
125// </motivation>
126
127//# <todo asof="$DATE:$">
128//# A List of bugs, limitations, extensions or planned refinements.
129//# </todo>
130
131
133{
134public:
135
136 // Create a bucket with the given parent.
137 // When <src>bucketStorage</src> is non-zero, reconstruct the
138 // object from it.
139 // It keeps the pointer to its parent (but does not own it).
140 ISMBucket (ISMBase* parent, const char* bucketStorage);
141
143
144 // Get the row-interval for given column and row.
145 // It sets the start and end of the interval to which the row belongs
146 // and the offset of its current value.
147 // It returns the index where the row number can be put in the
148 // bucket index.
149 uInt getInterval (uInt colnr, rownr_t rownr, rownr_t bucketNrrow,
150 rownr_t& start, rownr_t& end, uInt& offset) const;
151
152 // Is the bucket large enough to add a value?
153 Bool canAddData (uInt leng) const;
154
155 // Add the data to the data part.
156 // It updates the bucket index at the given index.
157 // An exception is thrown if the bucket is too small.
158 void addData (uInt colnr, rownr_t rownr, uInt index,
159 const char* data, uInt leng);
160
161 // Is the bucket large enough to replace a value?
162 Bool canReplaceData (uInt newLeng, uInt oldLeng) const;
163
164 // Replace a data item.
165 // When its length is variable (indicated by fixedLength=0), the old
166 // value will be removed and the new one appended at the end.
167 // An exception is thrown if the bucket is too small.
168 void replaceData (uInt& offset, const char* data, uInt newLeng,
169 uInt fixedLength);
170
171 // Get a pointer to the data for the given offset.
172 const char* get (uInt offset) const;
173
174 // Get the length of the data value.
175 // It is <src>fixedLength</src> when non-zero,
176 // otherwise read it from the data value.
177 uInt getLength (uInt fixedLength, const char* data) const;
178
179 // Get access to the offset of the data for given column and row.
180 // It allows to change it (used for example by replaceData).
181 uInt& getOffset (uInt colnr, rownr_t rownr);
182
183 // Get access to the index information for the given column.
184 // This is used by ISMColumn when putting the data.
185 // <group>
186 // Return the row numbers with a stored value.
188 // Return the offsets of the values stored in the data part.
189 Block<uInt>& offIndex (uInt colnr);
190 // Return the number of values stored.
191 uInt& indexUsed (uInt colnr);
192 // </group>
193
194 // Split the bucket in the middle.
195 // It returns the row number where the bucket was split and the
196 // new left and right bucket. The caller is responsible for
197 // deleting the newly created buckets.
198 // When possible a simple split is done.
199 // <br>
200 // The starting values in the right bucket may be copies of the
201 // values in the left bucket. The duplicated Block contains a switch
202 // per column indicating if the value is copied.
203 rownr_t split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated,
204 rownr_t bucketStartRow, rownr_t bucketNrrow,
205 uInt colnr, rownr_t rownr, uInt lengToAdd);
206
207 // Determine whether a simple split is possible. If so, do it.
208 // This is possible if the new row is at the end of the last bucket,
209 // which will often be the case.
210 // <br>A simple split means adding a new bucket for the new row.
211 // If the old bucket already contains values for that row, those
212 // values are moved to the new bucket.
213 // <br>This fuction is only called by split, which created the
214 // left and right bucket.
216 Block<Bool>& duplicated,
217 rownr_t& splitRownr, rownr_t rownr);
218
219 // Return the index where the bucket should be split to get
220 // two parts with almost identical length.
221 uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng,
222 const Block<uInt>& cumLeng);
223
224 // Remove <src>nr</src> items from data and index part by shifting
225 // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and
226 // <src>nused</src> get updated. The caller is responsible for
227 // removing data when needed (e.g. <src>ISMIndColumn</src> removes
228 // the indirect arrays from its file).
230 Block<uInt>& offIndex, uInt& nused, uInt leng);
231
232 // Copy the contents of that bucket to this bucket.
233 // This is used after a split operation.
234 void copy (const ISMBucket& that);
235
236 // Callback function when BucketCache reads a bucket.
237 // It creates an ISMBucket object and converts the raw bucketStorage
238 // to that object.
239 // It returns the pointer to ISMBucket object which gets part of the cache.
240 // The object gets deleted by the deleteCallBack function.
241 static char* readCallBack (void* owner, const char* bucketStorage);
242
243 // Callback function when BucketCache writes a bucket.
244 // It converts the ISMBucket bucket object to the raw bucketStorage.
245 static void writeCallBack (void* owner, char* bucketStorage,
246 const char* bucket);
247
248 // Callback function when BucketCache adds a new bucket to the data file.
249 // This function creates an empty ISMBucket object.
250 // It returns the pointer to ISMBucket object which gets part of the cache.
251 // The object gets deleted by the deleteCallBack function.
252 static char* initCallBack (void* owner);
253
254 // Callback function when BucketCache removes a bucket from the cache.
255 // This function dletes the ISMBucket bucket object.
256 static void deleteCallBack (void*, char* bucket);
257
258 // Show the layout of the bucket.
259 void show (ostream& os) const;
260
261 // Check that there are no repeated rowIds in the bucket
262 Bool check (uInt& offendingCol, uInt& offendingIndex,
263 rownr_t& offendingRow, rownr_t& offendingPrevRow) const;
264
265private:
266 // Forbid copy constructor.
268
269 // Forbid assignment.
271
272 // Remove a data item with the given length.
273 // If the length is zero, its variable length is read first.
274 void removeData (uInt offset, uInt leng);
275
276 // Insert a data value by appending it to the end.
277 // It returns the offset of the data value.
278 uInt insertData (const char* data, uInt leng);
279
280 // Copy a data item from this bucket to the other bucket.
281 uInt copyData (ISMBucket& other, uInt colnr, rownr_t toRownr,
282 uInt fromIndex, uInt toIndex) const;
283
284 // Read the data from the storage into this bucket.
285 void read (const char* bucketStorage);
286
287 // Write the bucket into the storage.
288 void write (char* bucketStorage) const;
289
290
291 //# Declare member variables.
292 // Pointer to the parent storage manager.
294 // The size (in bytes) of an uInt and rownr_t (used in index, etc.).
297 // The size (in bytes) of the data.
299 // The size (in bytes) of the index.
301 // The row index per column; each index contains the row number
302 // of each value stored in the bucket (for that column).
304 // The offset index per column; each index contains the offset (in bytes)
305 // of each value stored in the bucket (for that column).
307 // Nr of used elements in each index; i.e. the number of stored values
308 // per column.
310 // The data space (in external (e.g. canonical) format).
311 char* data_p;
312};
313
314
315inline const char* ISMBucket::get (uInt offset) const
316{
317 return data_p + offset;
318}
320{
321 return *(rowIndex_p[colnr]);
322}
324{
325 return *(offIndex_p[colnr]);
326}
328{
329 return indexUsed_p[colnr];
330}
331
332
333
334} //# NAMESPACE CASACORE - END
335
336#endif
simple 1-D array
Definition Block.h:200
void read(const char *bucketStorage)
Read the data from the storage into this bucket.
static char * initCallBack(void *owner)
Callback function when BucketCache adds a new bucket to the data file.
uInt & indexUsed(uInt colnr)
Return the number of values stored.
Definition ISMBucket.h:327
ISMBase * stmanPtr_p
Pointer to the parent storage manager.
Definition ISMBucket.h:293
const char * get(uInt offset) const
Get a pointer to the data for the given offset.
Definition ISMBucket.h:315
ISMBucket & operator=(const ISMBucket &)
Forbid assignment.
Block< rownr_t > & rowIndex(uInt colnr)
Get access to the index information for the given column.
Definition ISMBucket.h:319
uInt uIntSize_p
The size (in bytes) of an uInt and rownr_t (used in index, etc.).
Definition ISMBucket.h:295
uInt getInterval(uInt colnr, rownr_t rownr, rownr_t bucketNrrow, rownr_t &start, rownr_t &end, uInt &offset) const
Get the row-interval for given column and row.
void removeData(uInt offset, uInt leng)
Remove a data item with the given length.
Block< uInt > indexUsed_p
Nr of used elements in each index; i.e.
Definition ISMBucket.h:309
uInt copyData(ISMBucket &other, uInt colnr, rownr_t toRownr, uInt fromIndex, uInt toIndex) const
Copy a data item from this bucket to the other bucket.
Bool canReplaceData(uInt newLeng, uInt oldLeng) const
Is the bucket large enough to replace a value?
PtrBlock< Block< uInt > * > offIndex_p
The offset index per column; each index contains the offset (in bytes) of each value stored in the bu...
Definition ISMBucket.h:306
Bool check(uInt &offendingCol, uInt &offendingIndex, rownr_t &offendingRow, rownr_t &offendingPrevRow) const
Check that there are no repeated rowIds in the bucket.
Block< uInt > & offIndex(uInt colnr)
Return the offsets of the values stored in the data part.
Definition ISMBucket.h:323
void copy(const ISMBucket &that)
Copy the contents of that bucket to this bucket.
char * data_p
The data space (in external (e.g.
Definition ISMBucket.h:311
ISMBucket(const ISMBucket &)
Forbid copy constructor.
static char * readCallBack(void *owner, const char *bucketStorage)
Callback function when BucketCache reads a bucket.
void shiftLeft(uInt index, uInt nr, Block< rownr_t > &rowIndex, Block< uInt > &offIndex, uInt &nused, uInt leng)
Remove nr items from data and index part by shifting to the left.
Bool simpleSplit(ISMBucket *left, ISMBucket *right, Block< Bool > &duplicated, rownr_t &splitRownr, rownr_t rownr)
Determine whether a simple split is possible.
uInt & getOffset(uInt colnr, rownr_t rownr)
Get access to the offset of the data for given column and row.
uInt dataLeng_p
The size (in bytes) of the data.
Definition ISMBucket.h:298
void addData(uInt colnr, rownr_t rownr, uInt index, const char *data, uInt leng)
Add the data to the data part.
void write(char *bucketStorage) const
Write the bucket into the storage.
void show(ostream &os) const
Show the layout of the bucket.
uInt insertData(const char *data, uInt leng)
Insert a data value by appending it to the end.
static void deleteCallBack(void *, char *bucket)
Callback function when BucketCache removes a bucket from the cache.
uInt getLength(uInt fixedLength, const char *data) const
Get the length of the data value.
void replaceData(uInt &offset, const char *data, uInt newLeng, uInt fixedLength)
Replace a data item.
uInt indexLeng_p
The size (in bytes) of the index.
Definition ISMBucket.h:300
static void writeCallBack(void *owner, char *bucketStorage, const char *bucket)
Callback function when BucketCache writes a bucket.
Bool canAddData(uInt leng) const
Is the bucket large enough to add a value?
PtrBlock< Block< rownr_t > * > rowIndex_p
The row index per column; each index contains the row number of each value stored in the bucket (for ...
Definition ISMBucket.h:303
rownr_t split(ISMBucket *&left, ISMBucket *&right, Block< Bool > &duplicated, rownr_t bucketStartRow, rownr_t bucketNrrow, uInt colnr, rownr_t rownr, uInt lengToAdd)
Split the bucket in the middle.
uInt getSplit(uInt totLeng, const Block< uInt > &rowLeng, const Block< uInt > &cumLeng)
Return the index where the bucket should be split to get two parts with almost identical length.
ISMBucket(ISMBase *parent, const char *bucketStorage)
Create a bucket with the given parent.
A drop-in replacement for Block<T*>.
Definition Block.h:814
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:42
uInt64 rownr_t
Define the type of a row number in a table.
Definition aipsxtype.h:46