Kùzu C++ API
Loading...
Searching...
No Matches
null_mask.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <memory>
5
6#include <span>
7
8namespace kuzu {
9namespace common {
10
11class ArrowNullMaskTree;
12class Serializer;
13class Deserializer;
14
15constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
16 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000,
17 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000,
18 0x20000000, 0x40000000, 0x80000000, 0x100000000, 0x200000000, 0x400000000, 0x800000000,
19 0x1000000000, 0x2000000000, 0x4000000000, 0x8000000000, 0x10000000000, 0x20000000000,
20 0x40000000000, 0x80000000000, 0x100000000000, 0x200000000000, 0x400000000000, 0x800000000000,
21 0x1000000000000, 0x2000000000000, 0x4000000000000, 0x8000000000000, 0x10000000000000,
22 0x20000000000000, 0x40000000000000, 0x80000000000000, 0x100000000000000, 0x200000000000000,
23 0x400000000000000, 0x800000000000000, 0x1000000000000000, 0x2000000000000000,
24 0x4000000000000000, 0x8000000000000000};
25constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64] = {0xfffffffffffffffe, 0xfffffffffffffffd,
26 0xfffffffffffffffb, 0xfffffffffffffff7, 0xffffffffffffffef, 0xffffffffffffffdf,
27 0xffffffffffffffbf, 0xffffffffffffff7f, 0xfffffffffffffeff, 0xfffffffffffffdff,
28 0xfffffffffffffbff, 0xfffffffffffff7ff, 0xffffffffffffefff, 0xffffffffffffdfff,
29 0xffffffffffffbfff, 0xffffffffffff7fff, 0xfffffffffffeffff, 0xfffffffffffdffff,
30 0xfffffffffffbffff, 0xfffffffffff7ffff, 0xffffffffffefffff, 0xffffffffffdfffff,
31 0xffffffffffbfffff, 0xffffffffff7fffff, 0xfffffffffeffffff, 0xfffffffffdffffff,
32 0xfffffffffbffffff, 0xfffffffff7ffffff, 0xffffffffefffffff, 0xffffffffdfffffff,
33 0xffffffffbfffffff, 0xffffffff7fffffff, 0xfffffffeffffffff, 0xfffffffdffffffff,
34 0xfffffffbffffffff, 0xfffffff7ffffffff, 0xffffffefffffffff, 0xffffffdfffffffff,
35 0xffffffbfffffffff, 0xffffff7fffffffff, 0xfffffeffffffffff, 0xfffffdffffffffff,
36 0xfffffbffffffffff, 0xfffff7ffffffffff, 0xffffefffffffffff, 0xffffdfffffffffff,
37 0xffffbfffffffffff, 0xffff7fffffffffff, 0xfffeffffffffffff, 0xfffdffffffffffff,
38 0xfffbffffffffffff, 0xfff7ffffffffffff, 0xffefffffffffffff, 0xffdfffffffffffff,
39 0xffbfffffffffffff, 0xff7fffffffffffff, 0xfeffffffffffffff, 0xfdffffffffffffff,
40 0xfbffffffffffffff, 0xf7ffffffffffffff, 0xefffffffffffffff, 0xdfffffffffffffff,
41 0xbfffffffffffffff, 0x7fffffffffffffff};
42
43const uint64_t NULL_LOWER_MASKS[65] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff,
44 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff,
45 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff,
46 0x3fffffff, 0x7fffffff, 0xffffffff, 0x1ffffffff, 0x3ffffffff, 0x7ffffffff, 0xfffffffff,
47 0x1fffffffff, 0x3fffffffff, 0x7fffffffff, 0xffffffffff, 0x1ffffffffff, 0x3ffffffffff,
48 0x7ffffffffff, 0xfffffffffff, 0x1fffffffffff, 0x3fffffffffff, 0x7fffffffffff, 0xffffffffffff,
49 0x1ffffffffffff, 0x3ffffffffffff, 0x7ffffffffffff, 0xfffffffffffff, 0x1fffffffffffff,
50 0x3fffffffffffff, 0x7fffffffffffff, 0xffffffffffffff, 0x1ffffffffffffff, 0x3ffffffffffffff,
51 0x7ffffffffffffff, 0xfffffffffffffff, 0x1fffffffffffffff, 0x3fffffffffffffff,
52 0x7fffffffffffffff, 0xffffffffffffffff};
53const uint64_t NULL_HIGH_MASKS[65] = {0x0, 0x8000000000000000, 0xc000000000000000,
54 0xe000000000000000, 0xf000000000000000, 0xf800000000000000, 0xfc00000000000000,
55 0xfe00000000000000, 0xff00000000000000, 0xff80000000000000, 0xffc0000000000000,
56 0xffe0000000000000, 0xfff0000000000000, 0xfff8000000000000, 0xfffc000000000000,
57 0xfffe000000000000, 0xffff000000000000, 0xffff800000000000, 0xffffc00000000000,
58 0xffffe00000000000, 0xfffff00000000000, 0xfffff80000000000, 0xfffffc0000000000,
59 0xfffffe0000000000, 0xffffff0000000000, 0xffffff8000000000, 0xffffffc000000000,
60 0xffffffe000000000, 0xfffffff000000000, 0xfffffff800000000, 0xfffffffc00000000,
61 0xfffffffe00000000, 0xffffffff00000000, 0xffffffff80000000, 0xffffffffc0000000,
62 0xffffffffe0000000, 0xfffffffff0000000, 0xfffffffff8000000, 0xfffffffffc000000,
63 0xfffffffffe000000, 0xffffffffff000000, 0xffffffffff800000, 0xffffffffffc00000,
64 0xffffffffffe00000, 0xfffffffffff00000, 0xfffffffffff80000, 0xfffffffffffc0000,
65 0xfffffffffffe0000, 0xffffffffffff0000, 0xffffffffffff8000, 0xffffffffffffc000,
66 0xffffffffffffe000, 0xfffffffffffff000, 0xfffffffffffff800, 0xfffffffffffffc00,
67 0xfffffffffffffe00, 0xffffffffffffff00, 0xffffffffffffff80, 0xffffffffffffffc0,
68 0xffffffffffffffe0, 0xfffffffffffffff0, 0xfffffffffffffff8, 0xfffffffffffffffc,
69 0xfffffffffffffffe, 0xffffffffffffffff};
70
71class NullMask {
72public:
73 static constexpr uint64_t NO_NULL_ENTRY = 0;
74 static constexpr uint64_t ALL_NULL_ENTRY = ~uint64_t(NO_NULL_ENTRY);
75 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2 = 6;
76 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY = (uint64_t)1 << NUM_BITS_PER_NULL_ENTRY_LOG2;
77 static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY = NUM_BITS_PER_NULL_ENTRY >> 3;
78
79 // For creating a managed null mask
80 explicit NullMask(uint64_t capacity) : mayContainNulls{false} {
81 auto numNullEntries = (capacity + NUM_BITS_PER_NULL_ENTRY - 1) / NUM_BITS_PER_NULL_ENTRY;
82 buffer = std::make_unique<uint64_t[]>(numNullEntries);
83 data = std::span(buffer.get(), numNullEntries);
84 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
85 }
86
87 // For creating a null mask using existing data
88 explicit NullMask(std::span<uint64_t> nullData, bool mayContainNulls)
89 : data{nullData}, buffer{}, mayContainNulls{mayContainNulls} {}
90
91 inline void setAllNonNull() {
92 if (!mayContainNulls) {
93 return;
94 }
95 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
96 mayContainNulls = false;
97 }
98 inline void setAllNull() {
99 std::fill(data.begin(), data.end(), ALL_NULL_ENTRY);
100 mayContainNulls = true;
101 }
102
103 inline bool hasNoNullsGuarantee() const { return !mayContainNulls; }
104 uint64_t countNulls() const;
105
106 static void setNull(uint64_t* nullEntries, uint32_t pos, bool isNull);
107 inline void setNull(uint32_t pos, bool isNull) {
108 setNull(data.data(), pos, isNull);
109 if (isNull) {
110 mayContainNulls = true;
111 }
112 }
113
114 static inline bool isNull(const uint64_t* nullEntries, uint32_t pos) {
115 auto [entryPos, bitPosInEntry] = getNullEntryAndBitPos(pos);
116 return nullEntries[entryPos] & NULL_BITMASKS_WITH_SINGLE_ONE[bitPosInEntry];
117 }
118
119 inline bool isNull(uint32_t pos) const { return isNull(data.data(), pos); }
120
121 // const because updates to the data must set mayContainNulls if any value
122 // becomes non-null
123 // Modifying the underlying data should be done with setNull or copyFromNullData
124 inline const uint64_t* getData() const { return data.data(); }
125
126 static inline uint64_t getNumNullEntries(uint64_t numNullBits) {
127 return (numNullBits >> NUM_BITS_PER_NULL_ENTRY_LOG2) +
128 ((numNullBits - (numNullBits << NUM_BITS_PER_NULL_ENTRY_LOG2)) == 0 ? 0 : 1);
129 }
130
131 // Copies bitpacked null flags from one buffer to another, starting at an arbitrary bit
132 // offset and preserving adjacent bits.
133 //
134 // returns true if we have copied a nullBit with value 1 (indicates a null value) to
135 // dstNullEntries.
136 static bool copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
137 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
138
139 inline bool copyFrom(const NullMask& nullMask, uint64_t srcOffset, uint64_t dstOffset,
140 uint64_t numBitsToCopy, bool invert = false) {
141 if (nullMask.hasNoNullsGuarantee()) {
142 setNullFromRange(dstOffset, numBitsToCopy, invert);
143 return invert;
144 } else {
145 return copyFromNullBits(nullMask.getData(), srcOffset, dstOffset, numBitsToCopy,
146 invert);
147 }
148 }
149 bool copyFromNullBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
150 uint64_t numBitsToCopy, bool invert = false);
151
152 // Sets the given number of bits to null (if isNull is true) or non-null (if isNull is false),
153 // starting at the offset
154 static void setNullRange(uint64_t* nullEntries, uint64_t offset, uint64_t numBitsToSet,
155 bool isNull);
156
157 void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull);
158
159 void resize(uint64_t capacity);
160
161 void operator|=(const NullMask& other);
162
163 // Fast calculation of the minimum and maximum null values
164 // (essentially just three states, all null, all non-null and some null)
165 static std::pair<bool, bool> getMinMax(const uint64_t* nullEntries, uint64_t numValues);
166
167private:
168 static inline std::pair<uint64_t, uint64_t> getNullEntryAndBitPos(uint64_t pos) {
169 auto nullEntryPos = pos >> NUM_BITS_PER_NULL_ENTRY_LOG2;
170 return std::make_pair(nullEntryPos,
171 pos - (nullEntryPos << NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2));
172 }
173
174 static bool copyUnaligned(const uint64_t* srcNullEntries, uint64_t srcOffset,
175 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
176
177private:
178 std::span<uint64_t> data;
179 std::unique_ptr<uint64_t[]> buffer;
180 bool mayContainNulls;
181};
182
183} // namespace common
184} // namespace kuzu
Definition null_mask.h:71
void setAllNonNull()
Definition null_mask.h:91
bool copyFromNullBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
NullMask(std::span< uint64_t > nullData, bool mayContainNulls)
Definition null_mask.h:88
bool hasNoNullsGuarantee() const
Definition null_mask.h:103
static bool copyNullMask(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t *dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
void operator|=(const NullMask &other)
static std::pair< bool, bool > getMinMax(const uint64_t *nullEntries, uint64_t numValues)
void setAllNull()
Definition null_mask.h:98
NullMask(uint64_t capacity)
Definition null_mask.h:80
const uint64_t * getData() const
Definition null_mask.h:124
static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY
Definition null_mask.h:77
static bool isNull(const uint64_t *nullEntries, uint32_t pos)
Definition null_mask.h:114
static void setNullRange(uint64_t *nullEntries, uint64_t offset, uint64_t numBitsToSet, bool isNull)
bool isNull(uint32_t pos) const
Definition null_mask.h:119
static uint64_t getNumNullEntries(uint64_t numNullBits)
Definition null_mask.h:126
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY
Definition null_mask.h:76
static void setNull(uint64_t *nullEntries, uint32_t pos, bool isNull)
void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull)
bool copyFrom(const NullMask &nullMask, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
Definition null_mask.h:139
void setNull(uint32_t pos, bool isNull)
Definition null_mask.h:107
uint64_t countNulls() const
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2
Definition null_mask.h:75
void resize(uint64_t capacity)
static constexpr uint64_t ALL_NULL_ENTRY
Definition null_mask.h:74
static constexpr uint64_t NO_NULL_ENTRY
Definition null_mask.h:73
const uint64_t NULL_LOWER_MASKS[65]
Definition null_mask.h:43
const uint64_t NULL_HIGH_MASKS[65]
Definition null_mask.h:53
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64]
Definition null_mask.h:25
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64]
Definition null_mask.h:15
Definition array_utils.h:7