Kùzu C++ API
Loading...
Searching...
No Matches
null_mask.h
Go to the documentation of this file.
1#pragma once
2
3#include <memory>
4
5#include <span>
6
7namespace kuzu {
8namespace common {
9
10class ArrowNullMaskTree;
11class Serializer;
12class Deserializer;
13
14constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
15 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000,
16 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000,
17 0x20000000, 0x40000000, 0x80000000, 0x100000000, 0x200000000, 0x400000000, 0x800000000,
18 0x1000000000, 0x2000000000, 0x4000000000, 0x8000000000, 0x10000000000, 0x20000000000,
19 0x40000000000, 0x80000000000, 0x100000000000, 0x200000000000, 0x400000000000, 0x800000000000,
20 0x1000000000000, 0x2000000000000, 0x4000000000000, 0x8000000000000, 0x10000000000000,
21 0x20000000000000, 0x40000000000000, 0x80000000000000, 0x100000000000000, 0x200000000000000,
22 0x400000000000000, 0x800000000000000, 0x1000000000000000, 0x2000000000000000,
23 0x4000000000000000, 0x8000000000000000};
24constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64] = {0xfffffffffffffffe, 0xfffffffffffffffd,
25 0xfffffffffffffffb, 0xfffffffffffffff7, 0xffffffffffffffef, 0xffffffffffffffdf,
26 0xffffffffffffffbf, 0xffffffffffffff7f, 0xfffffffffffffeff, 0xfffffffffffffdff,
27 0xfffffffffffffbff, 0xfffffffffffff7ff, 0xffffffffffffefff, 0xffffffffffffdfff,
28 0xffffffffffffbfff, 0xffffffffffff7fff, 0xfffffffffffeffff, 0xfffffffffffdffff,
29 0xfffffffffffbffff, 0xfffffffffff7ffff, 0xffffffffffefffff, 0xffffffffffdfffff,
30 0xffffffffffbfffff, 0xffffffffff7fffff, 0xfffffffffeffffff, 0xfffffffffdffffff,
31 0xfffffffffbffffff, 0xfffffffff7ffffff, 0xffffffffefffffff, 0xffffffffdfffffff,
32 0xffffffffbfffffff, 0xffffffff7fffffff, 0xfffffffeffffffff, 0xfffffffdffffffff,
33 0xfffffffbffffffff, 0xfffffff7ffffffff, 0xffffffefffffffff, 0xffffffdfffffffff,
34 0xffffffbfffffffff, 0xffffff7fffffffff, 0xfffffeffffffffff, 0xfffffdffffffffff,
35 0xfffffbffffffffff, 0xfffff7ffffffffff, 0xffffefffffffffff, 0xffffdfffffffffff,
36 0xffffbfffffffffff, 0xffff7fffffffffff, 0xfffeffffffffffff, 0xfffdffffffffffff,
37 0xfffbffffffffffff, 0xfff7ffffffffffff, 0xffefffffffffffff, 0xffdfffffffffffff,
38 0xffbfffffffffffff, 0xff7fffffffffffff, 0xfeffffffffffffff, 0xfdffffffffffffff,
39 0xfbffffffffffffff, 0xf7ffffffffffffff, 0xefffffffffffffff, 0xdfffffffffffffff,
40 0xbfffffffffffffff, 0x7fffffffffffffff};
41
42const uint64_t NULL_LOWER_MASKS[65] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff,
43 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff,
44 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff,
45 0x3fffffff, 0x7fffffff, 0xffffffff, 0x1ffffffff, 0x3ffffffff, 0x7ffffffff, 0xfffffffff,
46 0x1fffffffff, 0x3fffffffff, 0x7fffffffff, 0xffffffffff, 0x1ffffffffff, 0x3ffffffffff,
47 0x7ffffffffff, 0xfffffffffff, 0x1fffffffffff, 0x3fffffffffff, 0x7fffffffffff, 0xffffffffffff,
48 0x1ffffffffffff, 0x3ffffffffffff, 0x7ffffffffffff, 0xfffffffffffff, 0x1fffffffffffff,
49 0x3fffffffffffff, 0x7fffffffffffff, 0xffffffffffffff, 0x1ffffffffffffff, 0x3ffffffffffffff,
50 0x7ffffffffffffff, 0xfffffffffffffff, 0x1fffffffffffffff, 0x3fffffffffffffff,
51 0x7fffffffffffffff, 0xffffffffffffffff};
52const uint64_t NULL_HIGH_MASKS[65] = {0x0, 0x8000000000000000, 0xc000000000000000,
53 0xe000000000000000, 0xf000000000000000, 0xf800000000000000, 0xfc00000000000000,
54 0xfe00000000000000, 0xff00000000000000, 0xff80000000000000, 0xffc0000000000000,
55 0xffe0000000000000, 0xfff0000000000000, 0xfff8000000000000, 0xfffc000000000000,
56 0xfffe000000000000, 0xffff000000000000, 0xffff800000000000, 0xffffc00000000000,
57 0xffffe00000000000, 0xfffff00000000000, 0xfffff80000000000, 0xfffffc0000000000,
58 0xfffffe0000000000, 0xffffff0000000000, 0xffffff8000000000, 0xffffffc000000000,
59 0xffffffe000000000, 0xfffffff000000000, 0xfffffff800000000, 0xfffffffc00000000,
60 0xfffffffe00000000, 0xffffffff00000000, 0xffffffff80000000, 0xffffffffc0000000,
61 0xffffffffe0000000, 0xfffffffff0000000, 0xfffffffff8000000, 0xfffffffffc000000,
62 0xfffffffffe000000, 0xffffffffff000000, 0xffffffffff800000, 0xffffffffffc00000,
63 0xffffffffffe00000, 0xfffffffffff00000, 0xfffffffffff80000, 0xfffffffffffc0000,
64 0xfffffffffffe0000, 0xffffffffffff0000, 0xffffffffffff8000, 0xffffffffffffc000,
65 0xffffffffffffe000, 0xfffffffffffff000, 0xfffffffffffff800, 0xfffffffffffffc00,
66 0xfffffffffffffe00, 0xffffffffffffff00, 0xffffffffffffff80, 0xffffffffffffffc0,
67 0xffffffffffffffe0, 0xfffffffffffffff0, 0xfffffffffffffff8, 0xfffffffffffffffc,
68 0xfffffffffffffffe, 0xffffffffffffffff};
69
70class NullMask {
71public:
72 static constexpr uint64_t NO_NULL_ENTRY = 0;
73 static constexpr uint64_t ALL_NULL_ENTRY = ~uint64_t(NO_NULL_ENTRY);
74 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2 = 6;
75 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY = (uint64_t)1 << NUM_BITS_PER_NULL_ENTRY_LOG2;
76 static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY = NUM_BITS_PER_NULL_ENTRY >> 3;
77
78 // For creating a managed null mask
79 explicit NullMask(uint64_t capacity) : mayContainNulls{false} {
80 auto numNullEntries = (capacity + NUM_BITS_PER_NULL_ENTRY - 1) / NUM_BITS_PER_NULL_ENTRY;
81 buffer = std::make_unique<uint64_t[]>(numNullEntries);
82 data = std::span(buffer.get(), numNullEntries);
83 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
84 }
85
86 // For creating a null mask using existing data
87 explicit NullMask(std::span<uint64_t> nullData, bool mayContainNulls)
88 : data{nullData}, buffer{}, mayContainNulls{mayContainNulls} {}
89
90 inline void setAllNonNull() {
91 if (!mayContainNulls) {
92 return;
93 }
94 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
95 mayContainNulls = false;
96 }
97 inline void setAllNull() {
98 std::fill(data.begin(), data.end(), ALL_NULL_ENTRY);
99 mayContainNulls = true;
100 }
101
102 inline bool hasNoNullsGuarantee() const { return !mayContainNulls; }
103
104 static void setNull(uint64_t* nullEntries, uint32_t pos, bool isNull);
105 inline void setNull(uint32_t pos, bool isNull) {
106 setNull(data.data(), pos, isNull);
107 if (isNull) {
108 mayContainNulls = true;
109 }
110 }
111
112 static inline bool isNull(const uint64_t* nullEntries, uint32_t pos) {
113 auto [entryPos, bitPosInEntry] = getNullEntryAndBitPos(pos);
114 return nullEntries[entryPos] & NULL_BITMASKS_WITH_SINGLE_ONE[bitPosInEntry];
115 }
116
117 inline bool isNull(uint32_t pos) const { return isNull(data.data(), pos); }
118
119 // const because updates to the data must set mayContainNulls if any value
120 // becomes non-null
121 // Modifying the underlying data should be done with setNull or copyFromNullData
122 inline const uint64_t* getData() const { return data.data(); }
123
124 static inline uint64_t getNumNullEntries(uint64_t numNullBits) {
125 return (numNullBits >> NUM_BITS_PER_NULL_ENTRY_LOG2) +
126 ((numNullBits - (numNullBits << NUM_BITS_PER_NULL_ENTRY_LOG2)) == 0 ? 0 : 1);
127 }
128
129 // Copies bitpacked null flags from one buffer to another, starting at an arbitrary bit
130 // offset and preserving adjacent bits.
131 //
132 // returns true if we have copied a nullBit with value 1 (indicates a null value) to
133 // dstNullEntries.
134 static bool copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
135 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
136
137 inline bool copyFrom(const NullMask& nullMask, uint64_t srcOffset, uint64_t dstOffset,
138 uint64_t numBitsToCopy, bool invert = false) {
139 if (nullMask.hasNoNullsGuarantee()) {
140 setNullFromRange(dstOffset, numBitsToCopy, invert);
141 return invert;
142 } else {
143 return copyFromNullBits(nullMask.getData(), srcOffset, dstOffset, numBitsToCopy,
144 invert);
145 }
146 }
147 bool copyFromNullBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
148 uint64_t numBitsToCopy, bool invert = false);
149
150 // Sets the given number of bits to null (if isNull is true) or non-null (if isNull is false),
151 // starting at the offset
152 static void setNullRange(uint64_t* nullEntries, uint64_t offset, uint64_t numBitsToSet,
153 bool isNull);
154
155 void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull);
156
157 void resize(uint64_t capacity);
158
159 void operator|=(const NullMask& other);
160
161 // Fast calculation of the minimum and maximum null values
162 // (essentially just three states, all null, all non-null and some null)
163 static std::pair<bool, bool> getMinMax(const uint64_t* nullEntries, uint64_t numValues);
164
165private:
166 static inline std::pair<uint64_t, uint64_t> getNullEntryAndBitPos(uint64_t pos) {
167 auto nullEntryPos = pos >> NUM_BITS_PER_NULL_ENTRY_LOG2;
168 return std::make_pair(nullEntryPos,
169 pos - (nullEntryPos << NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2));
170 }
171
172 static bool copyUnaligned(const uint64_t* srcNullEntries, uint64_t srcOffset,
173 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
174
175private:
176 std::span<uint64_t> data;
177 std::unique_ptr<uint64_t[]> buffer;
178 bool mayContainNulls;
179};
180
181} // namespace common
182} // namespace kuzu
Definition null_mask.h:70
void setAllNonNull()
Definition null_mask.h:90
bool copyFromNullBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
NullMask(std::span< uint64_t > nullData, bool mayContainNulls)
Definition null_mask.h:87
bool hasNoNullsGuarantee() const
Definition null_mask.h:102
static bool copyNullMask(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t *dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
void operator|=(const NullMask &other)
static std::pair< bool, bool > getMinMax(const uint64_t *nullEntries, uint64_t numValues)
void setAllNull()
Definition null_mask.h:97
NullMask(uint64_t capacity)
Definition null_mask.h:79
const uint64_t * getData() const
Definition null_mask.h:122
static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY
Definition null_mask.h:76
static bool isNull(const uint64_t *nullEntries, uint32_t pos)
Definition null_mask.h:112
static void setNullRange(uint64_t *nullEntries, uint64_t offset, uint64_t numBitsToSet, bool isNull)
bool isNull(uint32_t pos) const
Definition null_mask.h:117
static uint64_t getNumNullEntries(uint64_t numNullBits)
Definition null_mask.h:124
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY
Definition null_mask.h:75
static void setNull(uint64_t *nullEntries, uint32_t pos, bool isNull)
void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull)
bool copyFrom(const NullMask &nullMask, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
Definition null_mask.h:137
void setNull(uint32_t pos, bool isNull)
Definition null_mask.h:105
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2
Definition null_mask.h:74
void resize(uint64_t capacity)
static constexpr uint64_t ALL_NULL_ENTRY
Definition null_mask.h:73
static constexpr uint64_t NO_NULL_ENTRY
Definition null_mask.h:72
const uint64_t NULL_LOWER_MASKS[65]
Definition null_mask.h:42
const uint64_t NULL_HIGH_MASKS[65]
Definition null_mask.h:52
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64]
Definition null_mask.h:24
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64]
Definition null_mask.h:14
Definition array_utils.h:7