Kùzu C++ API
Loading...
Searching...
No Matches
value_vector.h
Go to the documentation of this file.
1#pragma once
2
3#include <utility>
4
5#include "assert.h"
6#include "cast.h"
7#include "copy_constructors.h"
8#include "data_chunk_state.h"
9#include "null_mask.h"
10#include "ku_string.h"
11#include "auxiliary_buffer.h"
12
13namespace kuzu {
14namespace common {
15
16class Value;
17
21 friend class ListVector;
22 friend class ListAuxiliaryBuffer;
23 friend class StructVector;
24 friend class StringVector;
25 friend class ArrowColumnVector;
26
27public:
28 explicit ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager = nullptr);
29 explicit ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager* memoryManager = nullptr)
30 : ValueVector(LogicalType(dataTypeID), memoryManager) {
31 KU_ASSERT(dataTypeID != LogicalTypeID::LIST);
32 }
33
35 ~ValueVector() = default;
36
37 void setState(const std::shared_ptr<DataChunkState>& state_);
38
39 void setAllNull() { nullMask.setAllNull(); }
40 void setAllNonNull() { nullMask.setAllNonNull(); }
41 // On return true, there are no null. On return false, there may or may not be nulls.
42 bool hasNoNullsGuarantee() const { return nullMask.hasNoNullsGuarantee(); }
43 void setNullRange(uint32_t startPos, uint32_t len, bool value) {
44 nullMask.setNullFromRange(startPos, len, value);
45 }
46 const NullMask& getNullMask() const { return nullMask; }
47 void setNull(uint32_t pos, bool isNull);
48 uint8_t isNull(uint32_t pos) const { return nullMask.isNull(pos); }
50 state->getSelVectorUnsafe().setSelSize(1);
51 setNull(state->getSelVector()[0], true);
52 }
53
54 bool setNullFromBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
55 uint64_t numBitsToCopy, bool invert = false);
56
57 uint32_t getNumBytesPerValue() const { return numBytesPerValue; }
58
59 // TODO(Guodong): Rename this to getValueRef
60 template<typename T>
61 const T& getValue(uint32_t pos) const {
62 return ((T*)valueBuffer.get())[pos];
63 }
64 template<typename T>
65 T& getValue(uint32_t pos) {
66 return ((T*)valueBuffer.get())[pos];
67 }
68 template<typename T>
69 void setValue(uint32_t pos, T val);
70 // copyFromRowData assumes rowData is non-NULL.
71 void copyFromRowData(uint32_t pos, const uint8_t* rowData);
72 // copyToRowData assumes srcVectorData is non-NULL.
73 void copyToRowData(uint32_t pos, uint8_t* rowData,
74 InMemOverflowBuffer* rowOverflowBuffer) const;
75 // copyFromVectorData assumes srcVectorData is non-NULL.
76 void copyFromVectorData(uint8_t* dstData, const ValueVector* srcVector,
77 const uint8_t* srcVectorData);
78 void copyFromVectorData(uint64_t dstPos, const ValueVector* srcVector, uint64_t srcPos);
79 void copyFromValue(uint64_t pos, const Value& value);
80
81 std::unique_ptr<Value> getAsValue(uint64_t pos) const;
82
83 uint8_t* getData() const { return valueBuffer.get(); }
84
85 offset_t readNodeOffset(uint32_t pos) const {
86 KU_ASSERT(dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID);
87 return getValue<nodeID_t>(pos).offset;
88 }
89
91
92 // If there is still non-null values after discarding, return true. Otherwise, return false.
93 // For an unflat vector, its selection vector is also updated to the resultSelVector.
94 static bool discardNull(ValueVector& vector);
95
96 void serialize(Serializer& ser) const;
97 static std::unique_ptr<ValueVector> deSerialize(Deserializer& deSer, storage::MemoryManager* mm,
98 std::shared_ptr<DataChunkState> dataChunkState);
99
100private:
101 uint32_t getDataTypeSize(const LogicalType& type);
102 void initializeValueBuffer();
103
104public:
106 std::shared_ptr<DataChunkState> state;
107
108private:
109 std::unique_ptr<uint8_t[]> valueBuffer;
110 NullMask nullMask;
111 uint32_t numBytesPerValue;
112 std::unique_ptr<AuxiliaryBuffer> auxiliaryBuffer;
113};
114
116public:
118 KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
119 return ku_dynamic_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
120 ->getOverflowBuffer();
121 }
122
123 static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
124 static void addString(ValueVector* vector, uint32_t vectorPos, const char* srcStr,
125 uint64_t length);
126 static void addString(ValueVector* vector, uint32_t vectorPos, const std::string& srcStr);
127 // Add empty string with space reserved for the provided size
128 // Returned value can be modified to set the string contents
129 static ku_string_t& reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length);
130 static void reserveString(ValueVector* vector, ku_string_t& dstStr, uint64_t length);
131 static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
132 static void addString(ValueVector* vector, ku_string_t& dstStr, const char* srcStr,
133 uint64_t length);
134 static void addString(kuzu::common::ValueVector* vector, ku_string_t& dstStr,
135 const std::string& srcStr);
136 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
137 InMemOverflowBuffer* rowOverflowBuffer);
138};
139
141 static void addBlob(ValueVector* vector, uint32_t pos, const char* data, uint32_t length) {
142 StringVector::addString(vector, pos, data, length);
143 }
144 static void addBlob(ValueVector* vector, uint32_t pos, const uint8_t* data, uint64_t length) {
145 StringVector::addString(vector, pos, reinterpret_cast<const char*>(data), length);
146 }
147};
148
149// ListVector is used for both LIST and ARRAY physical type
151public:
152 static const ListAuxiliaryBuffer& getAuxBuffer(const ValueVector& vector) {
153 return vector.auxiliaryBuffer->constCast<ListAuxiliaryBuffer>();
154 }
156 return vector.auxiliaryBuffer->cast<ListAuxiliaryBuffer>();
157 }
158 // If you call setDataVector during initialize, there must be a followed up
159 // copyListEntryAndBufferMetaData at runtime.
160 // TODO(Xiyang): try to merge setDataVector & copyListEntryAndBufferMetaData
161 static void setDataVector(const ValueVector* vector, std::shared_ptr<ValueVector> dataVector) {
162 KU_ASSERT(validateType(*vector));
163 auto& listBuffer = getAuxBufferUnsafe(*vector);
164 listBuffer.setDataVector(std::move(dataVector));
165 }
166 static void copyListEntryAndBufferMetaData(ValueVector& vector, const ValueVector& other);
167 static ValueVector* getDataVector(const ValueVector* vector) {
168 KU_ASSERT(validateType(*vector));
169 return getAuxBuffer(*vector).getDataVector();
170 }
171 static std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
172 KU_ASSERT(validateType(*vector));
173 return getAuxBuffer(*vector).getSharedDataVector();
174 }
175 static uint64_t getDataVectorSize(const ValueVector* vector) {
176 KU_ASSERT(validateType(*vector));
177 return getAuxBuffer(*vector).getSize();
178 }
179 static uint8_t* getListValues(const ValueVector* vector, const list_entry_t& listEntry) {
180 KU_ASSERT(validateType(*vector));
181 auto dataVector = getDataVector(vector);
182 return dataVector->getData() + dataVector->getNumBytesPerValue() * listEntry.offset;
183 }
184 static uint8_t* getListValuesWithOffset(const ValueVector* vector,
185 const list_entry_t& listEntry, offset_t elementOffsetInList) {
186 KU_ASSERT(validateType(*vector));
187 return getListValues(vector, listEntry) +
188 elementOffsetInList * getDataVector(vector)->getNumBytesPerValue();
189 }
190 static list_entry_t addList(ValueVector* vector, uint64_t listSize) {
191 KU_ASSERT(validateType(*vector));
192 return getAuxBufferUnsafe(*vector).addList(listSize);
193 }
194 static void resizeDataVector(ValueVector* vector, uint64_t numValues) {
195 KU_ASSERT(validateType(*vector));
196 getAuxBufferUnsafe(*vector).resize(numValues);
197 }
198
199 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
200 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
201 InMemOverflowBuffer* rowOverflowBuffer);
202 static void copyFromVectorData(ValueVector* dstVector, uint8_t* dstData,
203 const ValueVector* srcVector, const uint8_t* srcData);
204 static void appendDataVector(ValueVector* dstVector, ValueVector* srcDataVector,
205 uint64_t numValuesToAppend);
206 static void sliceDataVector(ValueVector* vectorToSlice, uint64_t offset, uint64_t numValues);
207
208private:
209 static bool validateType(const ValueVector& vector) {
210 switch (vector.dataType.getPhysicalType()) {
211 case PhysicalTypeID::LIST:
212 case PhysicalTypeID::ARRAY:
213 return true;
214 default:
215 return false;
216 }
217 }
218};
219
221public:
222 static const std::vector<std::shared_ptr<ValueVector>>& getFieldVectors(
223 const ValueVector* vector) {
224 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
225 ->getFieldVectors();
226 }
227
228 static std::shared_ptr<ValueVector> getFieldVector(const ValueVector* vector,
229 struct_field_idx_t idx) {
230 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
231 ->getFieldVectorShared(idx);
232 }
233
234 static ValueVector* getFieldVectorRaw(const ValueVector& vector, const std::string& fieldName) {
235 auto idx = StructType::getFieldIdx(vector.dataType, fieldName);
236 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector.auxiliaryBuffer.get())
237 ->getFieldVectorPtr(idx);
238 }
239
241 std::shared_ptr<ValueVector> vectorToReference) {
242 ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
243 ->referenceChildVector(idx, std::move(vectorToReference));
244 }
245
246 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
247 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
248 InMemOverflowBuffer* rowOverflowBuffer);
249 static void copyFromVectorData(ValueVector* dstVector, const uint8_t* dstData,
250 const ValueVector* srcVector, const uint8_t* srcData);
251};
252
254public:
259
260 static inline ValueVector* getValVector(const ValueVector* vector, union_field_idx_t fieldIdx) {
262 return StructVector::getFieldVector(vector, UnionType::getInternalFieldIdx(fieldIdx)).get();
263 }
264
265 static inline void referenceVector(ValueVector* vector, union_field_idx_t fieldIdx,
266 std::shared_ptr<ValueVector> vectorToReference) {
268 std::move(vectorToReference));
269 }
270
271 static inline void setTagField(ValueVector* vector, union_field_idx_t tag) {
273 for (auto i = 0u; i < vector->state->getSelVector().getSelSize(); i++) {
274 vector->setValue<struct_field_idx_t>(vector->state->getSelVector()[i], tag);
275 }
276 }
277};
278
280public:
281 static inline ValueVector* getKeyVector(const ValueVector* vector) {
282 return StructVector::getFieldVector(ListVector::getDataVector(vector), 0 /* keyVectorPos */)
283 .get();
284 }
285
286 static inline ValueVector* getValueVector(const ValueVector* vector) {
287 return StructVector::getFieldVector(ListVector::getDataVector(vector), 1 /* valVectorPos */)
288 .get();
289 }
290
291 static inline uint8_t* getMapKeys(const ValueVector* vector, const list_entry_t& listEntry) {
292 auto keyVector = getKeyVector(vector);
293 return keyVector->getData() + keyVector->getNumBytesPerValue() * listEntry.offset;
294 }
295
296 static inline uint8_t* getMapValues(const ValueVector* vector, const list_entry_t& listEntry) {
297 auto valueVector = getValueVector(vector);
298 return valueVector->getData() + valueVector->getNumBytesPerValue() * listEntry.offset;
299 }
300};
301
302} // namespace common
303} // namespace kuzu
#define KUZU_API
Definition api.h:25
#define KU_ASSERT(condition)
Definition assert.h:19
Definition in_mem_overflow_buffer.h:30
Definition auxiliary_buffer.h:79
Definition value_vector.h:150
static void setDataVector(const ValueVector *vector, std::shared_ptr< ValueVector > dataVector)
Definition value_vector.h:161
static void sliceDataVector(ValueVector *vectorToSlice, uint64_t offset, uint64_t numValues)
static void copyFromVectorData(ValueVector *dstVector, uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void appendDataVector(ValueVector *dstVector, ValueVector *srcDataVector, uint64_t numValuesToAppend)
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static list_entry_t addList(ValueVector *vector, uint64_t listSize)
Definition value_vector.h:190
static const ListAuxiliaryBuffer & getAuxBuffer(const ValueVector &vector)
Definition value_vector.h:152
static void resizeDataVector(ValueVector *vector, uint64_t numValues)
Definition value_vector.h:194
static void copyListEntryAndBufferMetaData(ValueVector &vector, const ValueVector &other)
static ListAuxiliaryBuffer & getAuxBufferUnsafe(const ValueVector &vector)
Definition value_vector.h:155
static uint8_t * getListValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:179
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static uint64_t getDataVectorSize(const ValueVector *vector)
Definition value_vector.h:175
static ValueVector * getDataVector(const ValueVector *vector)
Definition value_vector.h:167
static uint8_t * getListValuesWithOffset(const ValueVector *vector, const list_entry_t &listEntry, offset_t elementOffsetInList)
Definition value_vector.h:184
static std::shared_ptr< ValueVector > getSharedDataVector(const ValueVector *vector)
Definition value_vector.h:171
Definition types.h:246
KUZU_API LogicalTypeID getLogicalTypeID() const
Definition types.h:269
KUZU_API PhysicalTypeID getPhysicalType() const
Definition types.h:273
Definition value_vector.h:279
static ValueVector * getValueVector(const ValueVector *vector)
Definition value_vector.h:286
static uint8_t * getMapKeys(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:291
static uint8_t * getMapValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:296
static ValueVector * getKeyVector(const ValueVector *vector)
Definition value_vector.h:281
Definition null_mask.h:70
Definition value_vector.h:115
static void reserveString(ValueVector *vector, ku_string_t &dstStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, ku_string_t &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, uint32_t vectorPos, const std::string &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, ku_string_t &srcStr)
static ku_string_t & reserveString(ValueVector *vector, uint32_t vectorPos, uint64_t length)
static InMemOverflowBuffer * getInMemOverflowBuffer(ValueVector *vector)
Definition value_vector.h:117
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static void addString(kuzu::common::ValueVector *vector, ku_string_t &dstStr, const std::string &srcStr)
Definition value_vector.h:220
static const std::vector< std::shared_ptr< ValueVector > > & getFieldVectors(const ValueVector *vector)
Definition value_vector.h:222
static void copyFromVectorData(ValueVector *dstVector, const uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static void referenceVector(ValueVector *vector, struct_field_idx_t idx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:240
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static ValueVector * getFieldVectorRaw(const ValueVector &vector, const std::string &fieldName)
Definition value_vector.h:234
static std::shared_ptr< ValueVector > getFieldVector(const ValueVector *vector, struct_field_idx_t idx)
Definition value_vector.h:228
Definition value_vector.h:253
static void setTagField(ValueVector *vector, union_field_idx_t tag)
Definition value_vector.h:271
static void referenceVector(ValueVector *vector, union_field_idx_t fieldIdx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:265
static ValueVector * getValVector(const ValueVector *vector, union_field_idx_t fieldIdx)
Definition value_vector.h:260
static ValueVector * getTagVector(const ValueVector *vector)
Definition value_vector.h:255
Definition value.h:26
Definition value_vector.h:20
static bool discardNull(ValueVector &vector)
uint8_t * getData() const
Definition value_vector.h:83
void copyFromRowData(uint32_t pos, const uint8_t *rowData)
void copyToRowData(uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer) const
void setState(const std::shared_ptr< DataChunkState > &state_)
void copyFromValue(uint64_t pos, const Value &value)
std::unique_ptr< Value > getAsValue(uint64_t pos) const
void setAllNonNull()
Definition value_vector.h:40
uint8_t isNull(uint32_t pos) const
Definition value_vector.h:48
void serialize(Serializer &ser) const
ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager *memoryManager=nullptr)
Definition value_vector.h:29
const T & getValue(uint32_t pos) const
Definition value_vector.h:61
LogicalType dataType
Definition value_vector.h:105
static std::unique_ptr< ValueVector > deSerialize(Deserializer &deSer, storage::MemoryManager *mm, std::shared_ptr< DataChunkState > dataChunkState)
void copyFromVectorData(uint64_t dstPos, const ValueVector *srcVector, uint64_t srcPos)
DELETE_COPY_AND_MOVE(ValueVector)
const NullMask & getNullMask() const
Definition value_vector.h:46
bool hasNoNullsGuarantee() const
Definition value_vector.h:42
T & getValue(uint32_t pos)
Definition value_vector.h:65
void setAllNull()
Definition value_vector.h:39
uint32_t getNumBytesPerValue() const
Definition value_vector.h:57
void setAsSingleNullEntry()
Definition value_vector.h:49
void setValue(uint32_t pos, T val)
void copyFromVectorData(uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcVectorData)
offset_t readNodeOffset(uint32_t pos) const
Definition value_vector.h:85
bool setNullFromBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
ValueVector(LogicalType dataType, storage::MemoryManager *memoryManager=nullptr)
void setNullRange(uint32_t startPos, uint32_t len, bool value)
Definition value_vector.h:43
void setNull(uint32_t pos, bool isNull)
std::shared_ptr< DataChunkState > state
Definition value_vector.h:106
struct_field_idx_t union_field_idx_t
Definition types.h:46
LogicalTypeID
Definition types.h:167
uint64_t offset_t
Definition types.h:74
uint8_t struct_field_idx_t
Definition types.h:45
TO ku_dynamic_cast(FROM *old)
Definition cast.h:11
Definition array_utils.h:7
Definition value_vector.h:140
static void addBlob(ValueVector *vector, uint32_t pos, const uint8_t *data, uint64_t length)
Definition value_vector.h:144
static void addBlob(ValueVector *vector, uint32_t pos, const char *data, uint32_t length)
Definition value_vector.h:141
static struct_field_idx_t getFieldIdx(const LogicalType &type, const std::string &key)
static union_field_idx_t getInternalFieldIdx(union_field_idx_t idx)
static constexpr union_field_idx_t TAG_FIELD_IDX
Definition types.h:585
Definition ku_string.h:12
Definition types.h:108
offset_t offset
Definition types.h:109