Kùzu C++ API
Loading...
Searching...
No Matches
value_vector.h
Go to the documentation of this file.
1#pragma once
2
3#include <numeric>
4#include <utility>
5
6#include "assert.h"
7#include "cast.h"
8#include "data_chunk_state.h"
9#include "null_mask.h"
10#include "ku_string.h"
11#include "auxiliary_buffer.h"
12
13namespace kuzu {
14namespace common {
15
16class Value;
17
21 friend class ListVector;
22 friend class ListAuxiliaryBuffer;
23 friend class StructVector;
24 friend class StringVector;
25 friend class ArrowColumnVector;
26
27public:
28 explicit ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager = nullptr);
29 explicit ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager* memoryManager = nullptr)
30 : ValueVector(LogicalType(dataTypeID), memoryManager) {
31 KU_ASSERT(dataTypeID != LogicalTypeID::LIST);
32 }
33
34 ~ValueVector() = default;
35
36 void setState(const std::shared_ptr<DataChunkState>& state_);
37
38 void setAllNull() { nullMask.setAllNull(); }
39 void setAllNonNull() { nullMask.setAllNonNull(); }
40 // On return true, there are no null. On return false, there may or may not be nulls.
41 bool hasNoNullsGuarantee() const { return nullMask.hasNoNullsGuarantee(); }
42 void setNullRange(uint32_t startPos, uint32_t len, bool value) {
43 nullMask.setNullFromRange(startPos, len, value);
44 }
45 const NullMask& getNullMask() const { return nullMask; }
46 void setNull(uint32_t pos, bool isNull);
47 uint8_t isNull(uint32_t pos) const { return nullMask.isNull(pos); }
49 state->getSelVectorUnsafe().setSelSize(1);
50 setNull(state->getSelVector()[0], true);
51 }
52
53 bool setNullFromBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
54 uint64_t numBitsToCopy, bool invert = false);
55
56 uint32_t getNumBytesPerValue() const { return numBytesPerValue; }
57
58 // TODO(Guodong): Rename this to getValueRef
59 template<typename T>
60 const T& getValue(uint32_t pos) const {
61 return ((T*)valueBuffer.get())[pos];
62 }
63 template<typename T>
64 T& getValue(uint32_t pos) {
65 return ((T*)valueBuffer.get())[pos];
66 }
67 template<typename T>
68 void setValue(uint32_t pos, T val);
69 // copyFromRowData assumes rowData is non-NULL.
70 void copyFromRowData(uint32_t pos, const uint8_t* rowData);
71 // copyToRowData assumes srcVectorData is non-NULL.
72 void copyToRowData(uint32_t pos, uint8_t* rowData,
73 InMemOverflowBuffer* rowOverflowBuffer) const;
74 // copyFromVectorData assumes srcVectorData is non-NULL.
75 void copyFromVectorData(uint8_t* dstData, const ValueVector* srcVector,
76 const uint8_t* srcVectorData);
77 void copyFromVectorData(uint64_t dstPos, const ValueVector* srcVector, uint64_t srcPos);
78 void copyFromValue(uint64_t pos, const Value& value);
79
80 std::unique_ptr<Value> getAsValue(uint64_t pos) const;
81
82 uint8_t* getData() const { return valueBuffer.get(); }
83
84 offset_t readNodeOffset(uint32_t pos) const {
85 KU_ASSERT(dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID);
86 return getValue<nodeID_t>(pos).offset;
87 }
88
90
91 // If there is still non-null values after discarding, return true. Otherwise, return false.
92 // For an unflat vector, its selection vector is also updated to the resultSelVector.
93 static bool discardNull(ValueVector& vector);
94
95 void serialize(Serializer& ser) const;
96 static std::unique_ptr<ValueVector> deSerialize(Deserializer& deSer, storage::MemoryManager* mm,
97 std::shared_ptr<DataChunkState> dataChunkState);
98
99private:
100 uint32_t getDataTypeSize(const LogicalType& type);
101 void initializeValueBuffer();
102
103public:
105 std::shared_ptr<DataChunkState> state;
106
107private:
108 std::unique_ptr<uint8_t[]> valueBuffer;
109 NullMask nullMask;
110 uint32_t numBytesPerValue;
111 std::unique_ptr<AuxiliaryBuffer> auxiliaryBuffer;
112};
113
115public:
117 KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
118 return ku_dynamic_cast<AuxiliaryBuffer*, StringAuxiliaryBuffer*>(
119 vector->auxiliaryBuffer.get())
120 ->getOverflowBuffer();
121 }
122
123 static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
124 static void addString(ValueVector* vector, uint32_t vectorPos, const char* srcStr,
125 uint64_t length);
126 static void addString(ValueVector* vector, uint32_t vectorPos, const std::string& srcStr);
127 // Add empty string with space reserved for the provided size
128 // Returned value can be modified to set the string contents
129 static ku_string_t& reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length);
130 static void reserveString(ValueVector* vector, ku_string_t& dstStr, uint64_t length);
131 static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
132 static void addString(ValueVector* vector, ku_string_t& dstStr, const char* srcStr,
133 uint64_t length);
134 static void addString(kuzu::common::ValueVector* vector, ku_string_t& dstStr,
135 const std::string& srcStr);
136 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
137 InMemOverflowBuffer* rowOverflowBuffer);
138};
139
141 static void addBlob(ValueVector* vector, uint32_t pos, const char* data, uint32_t length) {
142 StringVector::addString(vector, pos, data, length);
143 }
144 static void addBlob(ValueVector* vector, uint32_t pos, const uint8_t* data, uint64_t length) {
145 StringVector::addString(vector, pos, reinterpret_cast<const char*>(data), length);
146 }
147};
148
149// ListVector is used for both LIST and ARRAY physical type
151public:
152 static const ListAuxiliaryBuffer& getAuxBuffer(const ValueVector& vector) {
153 return vector.auxiliaryBuffer->constCast<ListAuxiliaryBuffer>();
154 }
156 return vector.auxiliaryBuffer->cast<ListAuxiliaryBuffer>();
157 }
158 // If you call setDataVector during initialize, there must be a followed up
159 // copyListEntryAndBufferMetaData at runtime.
160 // TODO(Xiyang): try to merge setDataVector & copyListEntryAndBufferMetaData
161 static void setDataVector(const ValueVector* vector, std::shared_ptr<ValueVector> dataVector) {
162 KU_ASSERT(validateType(*vector));
163 auto& listBuffer = getAuxBufferUnsafe(*vector);
164 listBuffer.setDataVector(std::move(dataVector));
165 }
166 static void copyListEntryAndBufferMetaData(ValueVector& vector, const ValueVector& other);
167 static ValueVector* getDataVector(const ValueVector* vector) {
168 KU_ASSERT(validateType(*vector));
169 return getAuxBuffer(*vector).getDataVector();
170 }
171 static std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
172 KU_ASSERT(validateType(*vector));
173 return getAuxBuffer(*vector).getSharedDataVector();
174 }
175 static uint64_t getDataVectorSize(const ValueVector* vector) {
176 KU_ASSERT(validateType(*vector));
177 return getAuxBuffer(*vector).getSize();
178 }
179 static uint8_t* getListValues(const ValueVector* vector, const list_entry_t& listEntry) {
180 KU_ASSERT(validateType(*vector));
181 auto dataVector = getDataVector(vector);
182 return dataVector->getData() + dataVector->getNumBytesPerValue() * listEntry.offset;
183 }
184 static uint8_t* getListValuesWithOffset(const ValueVector* vector,
185 const list_entry_t& listEntry, offset_t elementOffsetInList) {
186 KU_ASSERT(validateType(*vector));
187 return getListValues(vector, listEntry) +
188 elementOffsetInList * getDataVector(vector)->getNumBytesPerValue();
189 }
190 static list_entry_t addList(ValueVector* vector, uint64_t listSize) {
191 KU_ASSERT(validateType(*vector));
192 return getAuxBufferUnsafe(*vector).addList(listSize);
193 }
194 static void resizeDataVector(ValueVector* vector, uint64_t numValues) {
195 KU_ASSERT(validateType(*vector));
196 getAuxBufferUnsafe(*vector).resize(numValues);
197 }
198
199 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
200 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
201 InMemOverflowBuffer* rowOverflowBuffer);
202 static void copyFromVectorData(ValueVector* dstVector, uint8_t* dstData,
203 const ValueVector* srcVector, const uint8_t* srcData);
204 static void appendDataVector(ValueVector* dstVector, ValueVector* srcDataVector,
205 uint64_t numValuesToAppend);
206 static void sliceDataVector(ValueVector* vectorToSlice, uint64_t offset, uint64_t numValues);
207
208private:
209 static bool validateType(const ValueVector& vector) {
210 switch (vector.dataType.getPhysicalType()) {
211 case PhysicalTypeID::LIST:
212 case PhysicalTypeID::ARRAY:
213 return true;
214 default:
215 return false;
216 }
217 }
218};
219
221public:
222 static inline const std::vector<std::shared_ptr<ValueVector>>& getFieldVectors(
223 const ValueVector* vector) {
225 vector->auxiliaryBuffer.get())
226 ->getFieldVectors();
227 }
228
229 static inline std::shared_ptr<ValueVector> getFieldVector(const ValueVector* vector,
230 struct_field_idx_t idx) {
232 vector->auxiliaryBuffer.get())
233 ->getFieldVectors()[idx];
234 }
235
236 static inline void referenceVector(ValueVector* vector, struct_field_idx_t idx,
237 std::shared_ptr<ValueVector> vectorToReference) {
239 ->referenceChildVector(idx, std::move(vectorToReference));
240 }
241
242 static inline void initializeEntries(ValueVector* vector) {
243 std::iota(reinterpret_cast<int64_t*>(vector->getData()),
244 reinterpret_cast<int64_t*>(
245 vector->getData() + vector->getNumBytesPerValue() * DEFAULT_VECTOR_CAPACITY),
246 0);
247 }
248
249 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
250 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
251 InMemOverflowBuffer* rowOverflowBuffer);
252 static void copyFromVectorData(ValueVector* dstVector, const uint8_t* dstData,
253 const ValueVector* srcVector, const uint8_t* srcData);
254};
255
257public:
262
263 static inline ValueVector* getValVector(const ValueVector* vector, union_field_idx_t fieldIdx) {
265 return StructVector::getFieldVector(vector, UnionType::getInternalFieldIdx(fieldIdx)).get();
266 }
267
268 static inline void referenceVector(ValueVector* vector, union_field_idx_t fieldIdx,
269 std::shared_ptr<ValueVector> vectorToReference) {
271 std::move(vectorToReference));
272 }
273
274 static inline void setTagField(ValueVector* vector, union_field_idx_t tag) {
276 for (auto i = 0u; i < vector->state->getSelVector().getSelSize(); i++) {
277 vector->setValue<struct_field_idx_t>(vector->state->getSelVector()[i], tag);
278 }
279 }
280};
281
283public:
284 static inline ValueVector* getKeyVector(const ValueVector* vector) {
285 return StructVector::getFieldVector(ListVector::getDataVector(vector), 0 /* keyVectorPos */)
286 .get();
287 }
288
289 static inline ValueVector* getValueVector(const ValueVector* vector) {
290 return StructVector::getFieldVector(ListVector::getDataVector(vector), 1 /* valVectorPos */)
291 .get();
292 }
293
294 static inline uint8_t* getMapKeys(const ValueVector* vector, const list_entry_t& listEntry) {
295 auto keyVector = getKeyVector(vector);
296 return keyVector->getData() + keyVector->getNumBytesPerValue() * listEntry.offset;
297 }
298
299 static inline uint8_t* getMapValues(const ValueVector* vector, const list_entry_t& listEntry) {
300 auto valueVector = getValueVector(vector);
301 return valueVector->getData() + valueVector->getNumBytesPerValue() * listEntry.offset;
302 }
303};
304
306 static void addString(ValueVector* vector, sel_t pos, ku_string_t str);
307 static void addString(ValueVector* vector, sel_t pos, const char* str, uint32_t length);
308
309 template<typename T>
310 static void add(ValueVector* vector, sel_t pos, T val);
311};
312
313} // namespace common
314} // namespace kuzu
#define KUZU_API
Definition api.h:25
#define KU_ASSERT(condition)
Definition assert.h:19
Definition deserializer.h:15
Definition in_mem_overflow_buffer.h:30
Definition auxiliary_buffer.h:75
Definition value_vector.h:150
static void setDataVector(const ValueVector *vector, std::shared_ptr< ValueVector > dataVector)
Definition value_vector.h:161
static void sliceDataVector(ValueVector *vectorToSlice, uint64_t offset, uint64_t numValues)
static void copyFromVectorData(ValueVector *dstVector, uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void appendDataVector(ValueVector *dstVector, ValueVector *srcDataVector, uint64_t numValuesToAppend)
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static list_entry_t addList(ValueVector *vector, uint64_t listSize)
Definition value_vector.h:190
static const ListAuxiliaryBuffer & getAuxBuffer(const ValueVector &vector)
Definition value_vector.h:152
static void resizeDataVector(ValueVector *vector, uint64_t numValues)
Definition value_vector.h:194
static void copyListEntryAndBufferMetaData(ValueVector &vector, const ValueVector &other)
static ListAuxiliaryBuffer & getAuxBufferUnsafe(const ValueVector &vector)
Definition value_vector.h:155
static uint8_t * getListValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:179
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static uint64_t getDataVectorSize(const ValueVector *vector)
Definition value_vector.h:175
static ValueVector * getDataVector(const ValueVector *vector)
Definition value_vector.h:167
static uint8_t * getListValuesWithOffset(const ValueVector *vector, const list_entry_t &listEntry, offset_t elementOffsetInList)
Definition value_vector.h:184
static std::shared_ptr< ValueVector > getSharedDataVector(const ValueVector *vector)
Definition value_vector.h:171
Definition types.h:201
KUZU_API LogicalTypeID getLogicalTypeID() const
Definition types.h:224
KUZU_API PhysicalTypeID getPhysicalType() const
Definition types.h:227
Definition value_vector.h:282
static ValueVector * getValueVector(const ValueVector *vector)
Definition value_vector.h:289
static uint8_t * getMapKeys(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:294
static uint8_t * getMapValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:299
static ValueVector * getKeyVector(const ValueVector *vector)
Definition value_vector.h:284
Definition null_mask.h:70
Definition serializer.h:15
Definition value_vector.h:114
static void reserveString(ValueVector *vector, ku_string_t &dstStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, ku_string_t &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, uint32_t vectorPos, const std::string &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, ku_string_t &srcStr)
static ku_string_t & reserveString(ValueVector *vector, uint32_t vectorPos, uint64_t length)
static InMemOverflowBuffer * getInMemOverflowBuffer(ValueVector *vector)
Definition value_vector.h:116
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static void addString(kuzu::common::ValueVector *vector, ku_string_t &dstStr, const std::string &srcStr)
Definition value_vector.h:220
static const std::vector< std::shared_ptr< ValueVector > > & getFieldVectors(const ValueVector *vector)
Definition value_vector.h:222
static void copyFromVectorData(ValueVector *dstVector, const uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static void referenceVector(ValueVector *vector, struct_field_idx_t idx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:236
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static void initializeEntries(ValueVector *vector)
Definition value_vector.h:242
static std::shared_ptr< ValueVector > getFieldVector(const ValueVector *vector, struct_field_idx_t idx)
Definition value_vector.h:229
Definition value_vector.h:256
static void setTagField(ValueVector *vector, union_field_idx_t tag)
Definition value_vector.h:274
static void referenceVector(ValueVector *vector, union_field_idx_t fieldIdx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:268
static ValueVector * getValVector(const ValueVector *vector, union_field_idx_t fieldIdx)
Definition value_vector.h:263
static ValueVector * getTagVector(const ValueVector *vector)
Definition value_vector.h:258
Definition value.h:27
Definition value_vector.h:20
static bool discardNull(ValueVector &vector)
uint8_t * getData() const
Definition value_vector.h:82
void copyFromRowData(uint32_t pos, const uint8_t *rowData)
void copyToRowData(uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer) const
void setState(const std::shared_ptr< DataChunkState > &state_)
void copyFromValue(uint64_t pos, const Value &value)
std::unique_ptr< Value > getAsValue(uint64_t pos) const
void setAllNonNull()
Definition value_vector.h:39
uint8_t isNull(uint32_t pos) const
Definition value_vector.h:47
void serialize(Serializer &ser) const
ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager *memoryManager=nullptr)
Definition value_vector.h:29
const T & getValue(uint32_t pos) const
Definition value_vector.h:60
LogicalType dataType
Definition value_vector.h:104
static std::unique_ptr< ValueVector > deSerialize(Deserializer &deSer, storage::MemoryManager *mm, std::shared_ptr< DataChunkState > dataChunkState)
void copyFromVectorData(uint64_t dstPos, const ValueVector *srcVector, uint64_t srcPos)
const NullMask & getNullMask() const
Definition value_vector.h:45
bool hasNoNullsGuarantee() const
Definition value_vector.h:41
T & getValue(uint32_t pos)
Definition value_vector.h:64
void setAllNull()
Definition value_vector.h:38
uint32_t getNumBytesPerValue() const
Definition value_vector.h:56
void setAsSingleNullEntry()
Definition value_vector.h:48
void setValue(uint32_t pos, T val)
void copyFromVectorData(uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcVectorData)
offset_t readNodeOffset(uint32_t pos) const
Definition value_vector.h:84
bool setNullFromBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
ValueVector(LogicalType dataType, storage::MemoryManager *memoryManager=nullptr)
void setNullRange(uint32_t startPos, uint32_t len, bool value)
Definition value_vector.h:42
void setNull(uint32_t pos, bool isNull)
std::shared_ptr< DataChunkState > state
Definition value_vector.h:105
constexpr uint64_t DEFAULT_VECTOR_CAPACITY
Definition constants.h:12
struct_field_idx_t union_field_idx_t
Definition types.h:44
uint64_t sel_t
Definition types.h:24
TO ku_dynamic_cast(FROM old)
Definition cast.h:11
LogicalTypeID
Definition types.h:126
uint64_t offset_t
Definition internal_id_t.h:22
uint8_t struct_field_idx_t
Definition types.h:43
Definition alter_type.h:5
Definition value_vector.h:140
static void addBlob(ValueVector *vector, uint32_t pos, const uint8_t *data, uint64_t length)
Definition value_vector.h:144
static void addBlob(ValueVector *vector, uint32_t pos, const char *data, uint32_t length)
Definition value_vector.h:141
Definition value_vector.h:305
static void add(ValueVector *vector, sel_t pos, T val)
static void addString(ValueVector *vector, sel_t pos, const char *str, uint32_t length)
static void addString(ValueVector *vector, sel_t pos, ku_string_t str)
static union_field_idx_t getInternalFieldIdx(union_field_idx_t idx)
static constexpr union_field_idx_t TAG_FIELD_IDX
Definition types.h:505
Definition ku_string.h:12
Definition types.h:70
offset_t offset
Definition types.h:71