Kùzu C++ API
Loading...
Searching...
No Matches
value_vector.h
Go to the documentation of this file.
1#pragma once
2
3#include <utility>
4
5#include "assert.h"
6#include "cast.h"
7#include "constants.h"
8#include "copy_constructors.h"
9#include "data_chunk_state.h"
10#include "null_mask.h"
11#include "ku_string.h"
12#include "auxiliary_buffer.h"
13
14namespace kuzu {
15namespace common {
16
17class Value;
18
22 friend class ListVector;
23 friend class ListAuxiliaryBuffer;
24 friend class StructVector;
25 friend class StringVector;
26 friend class ArrowColumnVector;
27
28public:
29 explicit ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager = nullptr,
30 std::shared_ptr<DataChunkState> dataChunkState = nullptr);
31 explicit ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager* memoryManager = nullptr)
32 : ValueVector(LogicalType(dataTypeID), memoryManager) {
33 KU_ASSERT(dataTypeID != LogicalTypeID::LIST);
34 }
35
37 ~ValueVector() = default;
38
39 template<class Func>
40 void forEachNonNull(Func&& func) const {
41 if (hasNoNullsGuarantee()) {
42 state->getSelVector().forEach(func);
43 } else {
44 state->getSelVector().forEach([&](auto i) {
45 if (!isNull(i)) {
46 func(i);
47 }
48 });
49 }
50 }
51
52 uint32_t countNonNull() const {
53 if (hasNoNullsGuarantee()) {
54 return state->getSelVector().getSelSize();
55 } else if (state->getSelVector().isUnfiltered() &&
56 state->getSelVector().getSelSize() == DEFAULT_VECTOR_CAPACITY) {
57 return nullMask.countNulls();
58 } else {
59 uint32_t count = 0;
60 forEachNonNull([&](auto) { count++; });
61 return count;
62 }
63 }
64
65 void setState(const std::shared_ptr<DataChunkState>& state_);
66
67 void setAllNull() { nullMask.setAllNull(); }
68 void setAllNonNull() { nullMask.setAllNonNull(); }
69 // On return true, there are no null. On return false, there may or may not be nulls.
70 bool hasNoNullsGuarantee() const { return nullMask.hasNoNullsGuarantee(); }
71 void setNullRange(uint32_t startPos, uint32_t len, bool value) {
72 nullMask.setNullFromRange(startPos, len, value);
73 }
74 const NullMask& getNullMask() const { return nullMask; }
75 void setNull(uint32_t pos, bool isNull);
76 uint8_t isNull(uint32_t pos) const { return nullMask.isNull(pos); }
78 state->getSelVectorUnsafe().setSelSize(1);
79 setNull(state->getSelVector()[0], true);
80 }
81
82 bool setNullFromBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
83 uint64_t numBitsToCopy, bool invert = false);
84
85 uint32_t getNumBytesPerValue() const { return numBytesPerValue; }
86
87 // TODO(Guodong): Rename this to getValueRef
88 template<typename T>
89 const T& getValue(uint32_t pos) const {
90 return ((T*)valueBuffer.get())[pos];
91 }
92 template<typename T>
93 T& getValue(uint32_t pos) {
94 return ((T*)valueBuffer.get())[pos];
95 }
96 template<typename T>
97 void setValue(uint32_t pos, T val);
98 // copyFromRowData assumes rowData is non-NULL.
99 void copyFromRowData(uint32_t pos, const uint8_t* rowData);
100 // copyToRowData assumes srcVectorData is non-NULL.
101 void copyToRowData(uint32_t pos, uint8_t* rowData,
102 InMemOverflowBuffer* rowOverflowBuffer) const;
103 // copyFromVectorData assumes srcVectorData is non-NULL.
104 void copyFromVectorData(uint8_t* dstData, const ValueVector* srcVector,
105 const uint8_t* srcVectorData);
106 void copyFromVectorData(uint64_t dstPos, const ValueVector* srcVector, uint64_t srcPos);
107 void copyFromValue(uint64_t pos, const Value& value);
108
109 std::unique_ptr<Value> getAsValue(uint64_t pos) const;
110
111 uint8_t* getData() const { return valueBuffer.get(); }
112
113 offset_t readNodeOffset(uint32_t pos) const {
114 KU_ASSERT(dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID);
115 return getValue<nodeID_t>(pos).offset;
116 }
117
119
120 // If there is still non-null values after discarding, return true. Otherwise, return false.
121 // For an unflat vector, its selection vector is also updated to the resultSelVector.
122 static bool discardNull(ValueVector& vector);
123
124 void serialize(Serializer& ser) const;
125 static std::unique_ptr<ValueVector> deSerialize(Deserializer& deSer, storage::MemoryManager* mm,
126 std::shared_ptr<DataChunkState> dataChunkState);
127
128private:
129 uint32_t getDataTypeSize(const LogicalType& type);
130 void initializeValueBuffer();
131
132public:
134 std::shared_ptr<DataChunkState> state;
135
136private:
137 std::unique_ptr<uint8_t[]> valueBuffer;
138 NullMask nullMask;
139 uint32_t numBytesPerValue;
140 std::unique_ptr<AuxiliaryBuffer> auxiliaryBuffer;
141};
142
144public:
146 KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
147 return ku_dynamic_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
148 ->getOverflowBuffer();
149 }
150
151 static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
152 static void addString(ValueVector* vector, uint32_t vectorPos, const char* srcStr,
153 uint64_t length);
154 static void addString(ValueVector* vector, uint32_t vectorPos, const std::string& srcStr);
155 // Add empty string with space reserved for the provided size
156 // Returned value can be modified to set the string contents
157 static ku_string_t& reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length);
158 static void reserveString(ValueVector* vector, ku_string_t& dstStr, uint64_t length);
159 static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
160 static void addString(ValueVector* vector, ku_string_t& dstStr, const char* srcStr,
161 uint64_t length);
162 static void addString(kuzu::common::ValueVector* vector, ku_string_t& dstStr,
163 const std::string& srcStr);
164 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
165 InMemOverflowBuffer* rowOverflowBuffer);
166};
167
169 static void addBlob(ValueVector* vector, uint32_t pos, const char* data, uint32_t length) {
170 StringVector::addString(vector, pos, data, length);
171 }
172 static void addBlob(ValueVector* vector, uint32_t pos, const uint8_t* data, uint64_t length) {
173 StringVector::addString(vector, pos, reinterpret_cast<const char*>(data), length);
174 }
175};
176
177// ListVector is used for both LIST and ARRAY physical type
179public:
180 static const ListAuxiliaryBuffer& getAuxBuffer(const ValueVector& vector) {
181 return vector.auxiliaryBuffer->constCast<ListAuxiliaryBuffer>();
182 }
184 return vector.auxiliaryBuffer->cast<ListAuxiliaryBuffer>();
185 }
186 // If you call setDataVector during initialize, there must be a followed up
187 // copyListEntryAndBufferMetaData at runtime.
188 // TODO(Xiyang): try to merge setDataVector & copyListEntryAndBufferMetaData
189 static void setDataVector(const ValueVector* vector, std::shared_ptr<ValueVector> dataVector) {
190 KU_ASSERT(validateType(*vector));
191 auto& listBuffer = getAuxBufferUnsafe(*vector);
192 listBuffer.setDataVector(std::move(dataVector));
193 }
194 static void copyListEntryAndBufferMetaData(ValueVector& vector, const ValueVector& other);
195 static ValueVector* getDataVector(const ValueVector* vector) {
196 KU_ASSERT(validateType(*vector));
197 return getAuxBuffer(*vector).getDataVector();
198 }
199 static std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
200 KU_ASSERT(validateType(*vector));
201 return getAuxBuffer(*vector).getSharedDataVector();
202 }
203 static uint64_t getDataVectorSize(const ValueVector* vector) {
204 KU_ASSERT(validateType(*vector));
205 return getAuxBuffer(*vector).getSize();
206 }
207 static uint8_t* getListValues(const ValueVector* vector, const list_entry_t& listEntry) {
208 KU_ASSERT(validateType(*vector));
209 auto dataVector = getDataVector(vector);
210 return dataVector->getData() + dataVector->getNumBytesPerValue() * listEntry.offset;
211 }
212 static uint8_t* getListValuesWithOffset(const ValueVector* vector,
213 const list_entry_t& listEntry, offset_t elementOffsetInList) {
214 KU_ASSERT(validateType(*vector));
215 return getListValues(vector, listEntry) +
216 elementOffsetInList * getDataVector(vector)->getNumBytesPerValue();
217 }
218 static list_entry_t addList(ValueVector* vector, uint64_t listSize) {
219 KU_ASSERT(validateType(*vector));
220 return getAuxBufferUnsafe(*vector).addList(listSize);
221 }
222 static void resizeDataVector(ValueVector* vector, uint64_t numValues) {
223 KU_ASSERT(validateType(*vector));
224 getAuxBufferUnsafe(*vector).resize(numValues);
225 }
226
227 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
228 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
229 InMemOverflowBuffer* rowOverflowBuffer);
230 static void copyFromVectorData(ValueVector* dstVector, uint8_t* dstData,
231 const ValueVector* srcVector, const uint8_t* srcData);
232 static void appendDataVector(ValueVector* dstVector, ValueVector* srcDataVector,
233 uint64_t numValuesToAppend);
234 static void sliceDataVector(ValueVector* vectorToSlice, uint64_t offset, uint64_t numValues);
235
236private:
237 static bool validateType(const ValueVector& vector) {
238 switch (vector.dataType.getPhysicalType()) {
239 case PhysicalTypeID::LIST:
240 case PhysicalTypeID::ARRAY:
241 return true;
242 default:
243 return false;
244 }
245 }
246};
247
249public:
250 static const std::vector<std::shared_ptr<ValueVector>>& getFieldVectors(
251 const ValueVector* vector) {
252 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
253 ->getFieldVectors();
254 }
255
256 static std::shared_ptr<ValueVector> getFieldVector(const ValueVector* vector,
257 struct_field_idx_t idx) {
258 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
259 ->getFieldVectorShared(idx);
260 }
261
262 static ValueVector* getFieldVectorRaw(const ValueVector& vector, const std::string& fieldName) {
263 auto idx = StructType::getFieldIdx(vector.dataType, fieldName);
264 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector.auxiliaryBuffer.get())
265 ->getFieldVectorPtr(idx);
266 }
267
269 std::shared_ptr<ValueVector> vectorToReference) {
270 ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
271 ->referenceChildVector(idx, std::move(vectorToReference));
272 }
273
274 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
275 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
276 InMemOverflowBuffer* rowOverflowBuffer);
277 static void copyFromVectorData(ValueVector* dstVector, const uint8_t* dstData,
278 const ValueVector* srcVector, const uint8_t* srcData);
279};
280
282public:
287
288 static inline ValueVector* getValVector(const ValueVector* vector, union_field_idx_t fieldIdx) {
290 return StructVector::getFieldVector(vector, UnionType::getInternalFieldIdx(fieldIdx)).get();
291 }
292
293 static inline void referenceVector(ValueVector* vector, union_field_idx_t fieldIdx,
294 std::shared_ptr<ValueVector> vectorToReference) {
296 std::move(vectorToReference));
297 }
298
299 static inline void setTagField(ValueVector* vector, union_field_idx_t tag) {
301 for (auto i = 0u; i < vector->state->getSelVector().getSelSize(); i++) {
302 vector->setValue<struct_field_idx_t>(vector->state->getSelVector()[i], tag);
303 }
304 }
305};
306
308public:
309 static inline ValueVector* getKeyVector(const ValueVector* vector) {
310 return StructVector::getFieldVector(ListVector::getDataVector(vector), 0 /* keyVectorPos */)
311 .get();
312 }
313
314 static inline ValueVector* getValueVector(const ValueVector* vector) {
315 return StructVector::getFieldVector(ListVector::getDataVector(vector), 1 /* valVectorPos */)
316 .get();
317 }
318
319 static inline uint8_t* getMapKeys(const ValueVector* vector, const list_entry_t& listEntry) {
320 auto keyVector = getKeyVector(vector);
321 return keyVector->getData() + keyVector->getNumBytesPerValue() * listEntry.offset;
322 }
323
324 static inline uint8_t* getMapValues(const ValueVector* vector, const list_entry_t& listEntry) {
325 auto valueVector = getValueVector(vector);
326 return valueVector->getData() + valueVector->getNumBytesPerValue() * listEntry.offset;
327 }
328};
329
330} // namespace common
331} // namespace kuzu
#define KUZU_API
Definition api.h:25
#define KU_ASSERT(condition)
Definition assert.h:19
Definition in_mem_overflow_buffer.h:32
Definition auxiliary_buffer.h:79
Definition value_vector.h:178
static void setDataVector(const ValueVector *vector, std::shared_ptr< ValueVector > dataVector)
Definition value_vector.h:189
static void sliceDataVector(ValueVector *vectorToSlice, uint64_t offset, uint64_t numValues)
static void copyFromVectorData(ValueVector *dstVector, uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void appendDataVector(ValueVector *dstVector, ValueVector *srcDataVector, uint64_t numValuesToAppend)
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static list_entry_t addList(ValueVector *vector, uint64_t listSize)
Definition value_vector.h:218
static const ListAuxiliaryBuffer & getAuxBuffer(const ValueVector &vector)
Definition value_vector.h:180
static void resizeDataVector(ValueVector *vector, uint64_t numValues)
Definition value_vector.h:222
static void copyListEntryAndBufferMetaData(ValueVector &vector, const ValueVector &other)
static ListAuxiliaryBuffer & getAuxBufferUnsafe(const ValueVector &vector)
Definition value_vector.h:183
static uint8_t * getListValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:207
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static uint64_t getDataVectorSize(const ValueVector *vector)
Definition value_vector.h:203
static ValueVector * getDataVector(const ValueVector *vector)
Definition value_vector.h:195
static uint8_t * getListValuesWithOffset(const ValueVector *vector, const list_entry_t &listEntry, offset_t elementOffsetInList)
Definition value_vector.h:212
static std::shared_ptr< ValueVector > getSharedDataVector(const ValueVector *vector)
Definition value_vector.h:199
Definition types.h:249
KUZU_API LogicalTypeID getLogicalTypeID() const
Definition types.h:272
KUZU_API PhysicalTypeID getPhysicalType() const
Definition types.h:276
Definition value_vector.h:307
static ValueVector * getValueVector(const ValueVector *vector)
Definition value_vector.h:314
static uint8_t * getMapKeys(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:319
static uint8_t * getMapValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:324
static ValueVector * getKeyVector(const ValueVector *vector)
Definition value_vector.h:309
Definition null_mask.h:71
Definition value_vector.h:143
static void reserveString(ValueVector *vector, ku_string_t &dstStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, ku_string_t &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, uint32_t vectorPos, const std::string &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, ku_string_t &srcStr)
static ku_string_t & reserveString(ValueVector *vector, uint32_t vectorPos, uint64_t length)
static InMemOverflowBuffer * getInMemOverflowBuffer(ValueVector *vector)
Definition value_vector.h:145
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static void addString(kuzu::common::ValueVector *vector, ku_string_t &dstStr, const std::string &srcStr)
Definition value_vector.h:248
static const std::vector< std::shared_ptr< ValueVector > > & getFieldVectors(const ValueVector *vector)
Definition value_vector.h:250
static void copyFromVectorData(ValueVector *dstVector, const uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static void referenceVector(ValueVector *vector, struct_field_idx_t idx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:268
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static ValueVector * getFieldVectorRaw(const ValueVector &vector, const std::string &fieldName)
Definition value_vector.h:262
static std::shared_ptr< ValueVector > getFieldVector(const ValueVector *vector, struct_field_idx_t idx)
Definition value_vector.h:256
Definition value_vector.h:281
static void setTagField(ValueVector *vector, union_field_idx_t tag)
Definition value_vector.h:299
static void referenceVector(ValueVector *vector, union_field_idx_t fieldIdx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:293
static ValueVector * getValVector(const ValueVector *vector, union_field_idx_t fieldIdx)
Definition value_vector.h:288
static ValueVector * getTagVector(const ValueVector *vector)
Definition value_vector.h:283
Definition value.h:26
Definition value_vector.h:21
static bool discardNull(ValueVector &vector)
uint8_t * getData() const
Definition value_vector.h:111
ValueVector(LogicalType dataType, storage::MemoryManager *memoryManager=nullptr, std::shared_ptr< DataChunkState > dataChunkState=nullptr)
void copyFromRowData(uint32_t pos, const uint8_t *rowData)
void copyToRowData(uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer) const
void forEachNonNull(Func &&func) const
Definition value_vector.h:40
void setState(const std::shared_ptr< DataChunkState > &state_)
void copyFromValue(uint64_t pos, const Value &value)
std::unique_ptr< Value > getAsValue(uint64_t pos) const
void setAllNonNull()
Definition value_vector.h:68
uint8_t isNull(uint32_t pos) const
Definition value_vector.h:76
void serialize(Serializer &ser) const
ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager *memoryManager=nullptr)
Definition value_vector.h:31
const T & getValue(uint32_t pos) const
Definition value_vector.h:89
LogicalType dataType
Definition value_vector.h:133
static std::unique_ptr< ValueVector > deSerialize(Deserializer &deSer, storage::MemoryManager *mm, std::shared_ptr< DataChunkState > dataChunkState)
void copyFromVectorData(uint64_t dstPos, const ValueVector *srcVector, uint64_t srcPos)
uint32_t countNonNull() const
Definition value_vector.h:52
DELETE_COPY_AND_MOVE(ValueVector)
const NullMask & getNullMask() const
Definition value_vector.h:74
bool hasNoNullsGuarantee() const
Definition value_vector.h:70
T & getValue(uint32_t pos)
Definition value_vector.h:93
void setAllNull()
Definition value_vector.h:67
uint32_t getNumBytesPerValue() const
Definition value_vector.h:85
void setAsSingleNullEntry()
Definition value_vector.h:77
void setValue(uint32_t pos, T val)
void copyFromVectorData(uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcVectorData)
offset_t readNodeOffset(uint32_t pos) const
Definition value_vector.h:113
bool setNullFromBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
void setNullRange(uint32_t startPos, uint32_t len, bool value)
Definition value_vector.h:71
void setNull(uint32_t pos, bool isNull)
std::shared_ptr< DataChunkState > state
Definition value_vector.h:134
constexpr uint64_t DEFAULT_VECTOR_CAPACITY
Definition constants.h:24
struct_field_idx_t union_field_idx_t
Definition types.h:49
LogicalTypeID
Definition types.h:170
uint64_t offset_t
Definition types.h:77
uint8_t struct_field_idx_t
Definition types.h:48
TO ku_dynamic_cast(FROM *old)
Definition cast.h:11
Definition array_utils.h:7
Definition value_vector.h:168
static void addBlob(ValueVector *vector, uint32_t pos, const uint8_t *data, uint64_t length)
Definition value_vector.h:172
static void addBlob(ValueVector *vector, uint32_t pos, const char *data, uint32_t length)
Definition value_vector.h:169
static struct_field_idx_t getFieldIdx(const LogicalType &type, const std::string &key)
static union_field_idx_t getInternalFieldIdx(union_field_idx_t idx)
static constexpr union_field_idx_t TAG_FIELD_IDX
Definition types.h:590
Definition ku_string.h:12
Definition types.h:111
offset_t offset
Definition types.h:112