Kùzu C++ API
Loading...
Searching...
No Matches
column.h
Go to the documentation of this file.
1#pragma once
2
3#include "catalog.h"
4#include "null_mask.h"
5#include "types.h"
6#include "compression.h"
7#include "db_file_id.h"
8#include "column_chunk_data.h"
9
10namespace kuzu {
11namespace evaluator {
12class ExpressionEvaluator;
13} // namespace evaluator
14namespace storage {
15
16struct CompressionMetadata;
17
19 std::function<void(uint8_t* frame, PageCursor& pageCursor, common::ValueVector* resultVector,
20 uint32_t posInVector, uint32_t numValuesToRead, const CompressionMetadata& metadata)>;
21using write_values_from_vector_func_t = std::function<void(uint8_t* frame, uint16_t posInFrame,
22 common::ValueVector* vector, uint32_t posInVector, const CompressionMetadata& metadata)>;
23using write_values_func_t = std::function<void(uint8_t* frame, uint16_t posInFrame,
24 const uint8_t* data, common::offset_t dataOffset, common::offset_t numValues,
25 const CompressionMetadata& metadata, const common::NullMask*)>;
26
28 std::function<void(uint8_t* frame, PageCursor& pageCursor, uint8_t* result,
29 uint32_t posInResult, uint64_t numValues, const CompressionMetadata& metadata)>;
30// This is a special usage for the `batchLookup` interface.
32
33class NullColumn;
34class StructColumn;
35class RelTableData;
37class ShadowFile;
38class BufferManager;
39class Column {
40 friend class StringColumn;
41 friend class StructColumn;
42 friend class ListColumn;
43 friend class RelTableData;
44
45public:
46 // TODO(Guodong): Remove transaction from interface of Column. There is no need to be aware of
47 // transaction when reading/writing from/to disk pages.
48 Column(std::string name, common::LogicalType dataType, BMFileHandle* dataFH,
49 BufferManager* bufferManager, ShadowFile* shadowFile, bool enableCompression,
50 bool requireNullColumn = true);
51 virtual ~Column();
52
53 static std::unique_ptr<ColumnChunkData> flushChunkData(const ColumnChunkData& chunkData,
54 BMFileHandle& dataFH);
55 static std::unique_ptr<ColumnChunkData> flushNonNestedChunkData(
56 const ColumnChunkData& chunkData, BMFileHandle& dataFH);
57 static ColumnChunkMetadata flushData(const ColumnChunkData& chunkData, BMFileHandle& dataFH);
58
59 virtual void scan(transaction::Transaction* transaction, const ChunkState& state,
60 common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan,
61 common::ValueVector* nodeIDVector, common::ValueVector* resultVector);
62 virtual void lookupValue(transaction::Transaction* transaction, const ChunkState& state,
63 common::offset_t nodeOffset, common::ValueVector* resultVector, uint32_t posInVector);
64
65 // Scan from [startOffsetInGroup, endOffsetInGroup).
66 virtual void scan(transaction::Transaction* transaction, const ChunkState& state,
67 common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup,
68 common::ValueVector* resultVector, uint64_t offsetInVector);
69 // Scan from [startOffsetInGroup, endOffsetInGroup).
70 virtual void scan(transaction::Transaction* transaction, const ChunkState& state,
71 ColumnChunkData* columnChunk, common::offset_t startOffset = 0,
73
75 const common::LogicalType& getDataType() const { return dataType; }
76
78
79 std::string getName() const { return name; }
80
81 virtual void scan(transaction::Transaction* transaction, const ChunkState& state,
82 common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, uint8_t* result);
83
84 // Batch write to a set of sequential pages.
85 virtual void write(ColumnChunkData& persistentChunk, ChunkState& state,
86 common::offset_t dstOffset, ColumnChunkData* data, common::offset_t srcOffset,
87 common::length_t numValues);
88
89 // Append values to the end of the node group, resizing it if necessary
91 const uint8_t* data, const common::NullMask* nullChunkData, common::offset_t numValues);
92
93 virtual void checkpointColumnChunk(ColumnCheckpointState& checkpointState);
94
95 template<class TARGET>
96 TARGET& cast() {
98 }
99 template<class TARGET>
100 const TARGET& cast() const {
102 }
103
104protected:
105 virtual void scanInternal(transaction::Transaction* transaction, const ChunkState& state,
106 common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan,
107 common::ValueVector* nodeIDVector, common::ValueVector* resultVector);
108 void scanUnfiltered(transaction::Transaction* transaction, PageCursor& pageCursor,
109 uint64_t numValuesToScan, common::ValueVector* resultVector,
110 const ColumnChunkMetadata& chunkMeta, uint64_t startPosInVector = 0) const;
111 void scanFiltered(transaction::Transaction* transaction, PageCursor& pageCursor,
112 uint64_t numValuesToScan, const common::SelectionVector& selVector,
113 common::ValueVector* resultVector, const ColumnChunkMetadata& chunkMeta) const;
114
115 virtual void lookupInternal(transaction::Transaction* transaction, const ChunkState& state,
116 common::offset_t nodeOffset, common::ValueVector* resultVector, uint32_t posInVector);
117
119 const std::function<void(uint8_t*)>& func) const;
120
121 virtual void writeValues(ColumnChunkData& persistentChunk, ChunkState& state,
122 common::offset_t dstOffset, const uint8_t* data, const common::NullMask* nullChunkData,
123 common::offset_t srcOffset = 0, common::offset_t numValues = 1);
124
125 // Produces a page cursor for the offset relative to the given node group
126 static PageCursor getPageCursorForOffsetInGroup(common::offset_t offsetInChunk,
127 const ChunkState& state);
128 void updatePageWithCursor(PageCursor cursor,
129 const std::function<void(uint8_t*, common::offset_t)>& writeOp) const;
130
132 const std::optional<StorageValue>& min, const std::optional<StorageValue>& max) const;
133
134protected:
136 common::offset_t maxOffset) const;
137
138 virtual bool canCheckpointInPlace(const ChunkState& state,
139 const ColumnCheckpointState& checkpointState);
140
142 const ColumnCheckpointState& checkpointState);
143 void checkpointNullData(const ColumnCheckpointState& checkpointState) const;
144
146 const ColumnCheckpointState& checkpointState);
147
148 // check if val is in range [start, end)
149 static bool isInRange(uint64_t val, uint64_t start, uint64_t end) {
150 return val >= start && val < end;
151 }
152
153protected:
154 std::string name;
157 BMFileHandle* dataFH;
158 BufferManager* bufferManager;
159 ShadowFile* shadowFile;
160 std::unique_ptr<NullColumn> nullColumn;
167};
168
169class InternalIDColumn final : public Column {
170public:
171 InternalIDColumn(std::string name, BMFileHandle* dataFH, BufferManager* bufferManager,
172 ShadowFile* shadowFile, bool enableCompression);
173
174 void scan(transaction::Transaction* transaction, const ChunkState& state,
175 common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan,
176 common::ValueVector* nodeIDVector, common::ValueVector* resultVector) override {
177 Column::scan(transaction, state, startOffsetInChunk, numValuesToScan, nodeIDVector,
178 resultVector);
179 populateCommonTableID(resultVector);
180 }
181
182 void scan(transaction::Transaction* transaction, const ChunkState& state,
183 common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup,
184 common::ValueVector* resultVector, uint64_t offsetInVector) override {
185 Column::scan(transaction, state, startOffsetInGroup, endOffsetInGroup, resultVector,
186 offsetInVector);
187 populateCommonTableID(resultVector);
188 }
189
190 void lookupInternal(transaction::Transaction* transaction, const ChunkState& state,
191 common::offset_t nodeOffset, common::ValueVector* resultVector,
192 uint32_t posInVector) override {
193 Column::lookupInternal(transaction, state, nodeOffset, resultVector, posInVector);
194 populateCommonTableID(resultVector);
195 }
196
197 common::table_id_t getCommonTableID() const { return commonTableID; }
198 // TODO(Guodong): This function should be removed through rewritting INTERNAL_ID as STRUCT.
199 void setCommonTableID(common::table_id_t tableID) { commonTableID = tableID; }
200
201private:
202 void populateCommonTableID(const common::ValueVector* resultVector) const;
203
204private:
205 common::table_id_t commonTableID;
206};
207
209 static std::unique_ptr<Column> createColumn(std::string name, common::LogicalType dataType,
210 BMFileHandle* dataFH, BufferManager* bufferManager, ShadowFile* shadowFile,
211 bool enableCompression);
212};
213
214} // namespace storage
215} // namespace kuzu
Definition types.h:201
Definition null_mask.h:70
Definition sel_vector.h:12
Definition value_vector.h:20
Definition column_chunk_data.h:85
Definition column.h:39
ShadowFile * shadowFile
Definition column.h:159
static PageCursor getPageCursorForOffsetInGroup(common::offset_t offsetInChunk, const ChunkState &state)
void scanUnfiltered(transaction::Transaction *transaction, PageCursor &pageCursor, uint64_t numValuesToScan, common::ValueVector *resultVector, const ColumnChunkMetadata &chunkMeta, uint64_t startPosInVector=0) const
virtual void checkpointColumnChunkOutOfPlace(ChunkState &state, const ColumnCheckpointState &checkpointState)
void updatePageWithCursor(PageCursor cursor, const std::function< void(uint8_t *, common::offset_t)> &writeOp) const
write_values_func_t writeFunc
Definition column.h:163
read_values_to_page_func_t readToPageFunc
Definition column.h:164
virtual void lookupValue(transaction::Transaction *transaction, const ChunkState &state, common::offset_t nodeOffset, common::ValueVector *resultVector, uint32_t posInVector)
static bool isInRange(uint64_t val, uint64_t start, uint64_t end)
Definition column.h:149
friend class RelTableData
Definition column.h:43
bool isMaxOffsetOutOfPagesCapacity(const ColumnChunkMetadata &metadata, common::offset_t maxOffset) const
virtual void scan(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector *resultVector, uint64_t offsetInVector)
const TARGET & cast() const
Definition column.h:100
static std::unique_ptr< ColumnChunkData > flushChunkData(const ColumnChunkData &chunkData, BMFileHandle &dataFH)
virtual void writeValues(ColumnChunkData &persistentChunk, ChunkState &state, common::offset_t dstOffset, const uint8_t *data, const common::NullMask *nullChunkData, common::offset_t srcOffset=0, common::offset_t numValues=1)
std::string name
Definition column.h:154
friend class StringColumn
Definition column.h:40
bool enableCompression
Definition column.h:166
common::LogicalType dataType
Definition column.h:156
virtual void checkpointColumnChunkInPlace(ChunkState &state, const ColumnCheckpointState &checkpointState)
read_values_to_vector_func_t readToVectorFunc
Definition column.h:161
write_values_from_vector_func_t writeFromVectorFunc
Definition column.h:162
void updateStatistics(ColumnChunkMetadata &metadata, common::offset_t maxIndex, const std::optional< StorageValue > &min, const std::optional< StorageValue > &max) const
virtual void scan(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, uint8_t *result)
virtual void scan(transaction::Transaction *transaction, const ChunkState &state, ColumnChunkData *columnChunk, common::offset_t startOffset=0, common::offset_t endOffset=common::INVALID_OFFSET)
std::string getName() const
Definition column.h:79
virtual void checkpointColumnChunk(ColumnCheckpointState &checkpointState)
void checkpointNullData(const ColumnCheckpointState &checkpointState) const
virtual void write(ColumnChunkData &persistentChunk, ChunkState &state, common::offset_t dstOffset, ColumnChunkData *data, common::offset_t srcOffset, common::length_t numValues)
virtual void scanInternal(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan, common::ValueVector *nodeIDVector, common::ValueVector *resultVector)
static ColumnChunkMetadata flushData(const ColumnChunkData &chunkData, BMFileHandle &dataFH)
Column(std::string name, common::LogicalType dataType, BMFileHandle *dataFH, BufferManager *bufferManager, ShadowFile *shadowFile, bool enableCompression, bool requireNullColumn=true)
void readFromPage(transaction::Transaction *transaction, common::page_idx_t pageIdx, const std::function< void(uint8_t *)> &func) const
void scanFiltered(transaction::Transaction *transaction, PageCursor &pageCursor, uint64_t numValuesToScan, const common::SelectionVector &selVector, common::ValueVector *resultVector, const ColumnChunkMetadata &chunkMeta) const
static std::unique_ptr< ColumnChunkData > flushNonNestedChunkData(const ColumnChunkData &chunkData, BMFileHandle &dataFH)
TARGET & cast()
Definition column.h:96
Column * getNullColumn() const
virtual void scan(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan, common::ValueVector *nodeIDVector, common::ValueVector *resultVector)
const common::LogicalType & getDataType() const
Definition column.h:75
virtual bool canCheckpointInPlace(const ChunkState &state, const ColumnCheckpointState &checkpointState)
DBFileID dbFileID
Definition column.h:155
std::unique_ptr< NullColumn > nullColumn
Definition column.h:160
friend class ListColumn
Definition column.h:42
BMFileHandle * dataFH
Definition column.h:157
friend class StructColumn
Definition column.h:41
BufferManager * bufferManager
Definition column.h:158
virtual void lookupInternal(transaction::Transaction *transaction, const ChunkState &state, common::offset_t nodeOffset, common::ValueVector *resultVector, uint32_t posInVector)
common::offset_t appendValues(ColumnChunkData &persistentChunk, ChunkState &state, const uint8_t *data, const common::NullMask *nullChunkData, common::offset_t numValues)
common::LogicalType & getDataType()
Definition column.h:74
batch_lookup_func_t batchLookupFunc
Definition column.h:165
Definition column.h:169
void setCommonTableID(common::table_id_t tableID)
Definition column.h:199
void lookupInternal(transaction::Transaction *transaction, const ChunkState &state, common::offset_t nodeOffset, common::ValueVector *resultVector, uint32_t posInVector) override
Definition column.h:190
void scan(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector *resultVector, uint64_t offsetInVector) override
Definition column.h:182
InternalIDColumn(std::string name, BMFileHandle *dataFH, BufferManager *bufferManager, ShadowFile *shadowFile, bool enableCompression)
void scan(transaction::Transaction *transaction, const ChunkState &state, common::offset_t startOffsetInChunk, common::row_idx_t numValuesToScan, common::ValueVector *nodeIDVector, common::ValueVector *resultVector) override
Definition column.h:174
common::table_id_t getCommonTableID() const
Definition column.h:197
Definition transaction.h:28
uint64_t length_t
Definition types.h:53
constexpr offset_t INVALID_OFFSET
Definition internal_id_t.h:23
uint32_t page_idx_t
Definition types.h:26
uint64_t table_id_t
Definition internal_id_t.h:14
TO ku_dynamic_cast(FROM old)
Definition cast.h:11
uint64_t offset_t
Definition internal_id_t.h:22
uint64_t row_idx_t
Definition types.h:46
std::function< void(uint8_t *frame, uint16_t posInFrame, const uint8_t *data, common::offset_t dataOffset, common::offset_t numValues, const CompressionMetadata &metadata, const common::NullMask *)> write_values_func_t
Definition column.h:23
std::function< void(uint8_t *frame, uint16_t posInFrame, common::ValueVector *vector, uint32_t posInVector, const CompressionMetadata &metadata)> write_values_from_vector_func_t
Definition column.h:21
std::function< void(uint8_t *frame, PageCursor &pageCursor, common::ValueVector *resultVector, uint32_t posInVector, uint32_t numValuesToRead, const CompressionMetadata &metadata)> read_values_to_vector_func_t
Definition column.h:18
read_values_to_page_func_t batch_lookup_func_t
Definition column.h:31
std::function< void(uint8_t *frame, PageCursor &pageCursor, uint8_t *result, uint32_t posInResult, uint64_t numValues, const CompressionMetadata &metadata)> read_values_to_page_func_t
Definition column.h:27
Definition alter_type.h:5
Definition column_chunk_data.h:45
Definition column_chunk.h:19
Definition column_chunk_data.h:29
Definition column.h:208
static std::unique_ptr< Column > createColumn(std::string name, common::LogicalType dataType, BMFileHandle *dataFH, BufferManager *bufferManager, ShadowFile *shadowFile, bool enableCompression)
Definition compression.h:111
Definition db_file_id.h:15