Kùzu C++ API
Loading...
Searching...
No Matches
constants.h
Go to the documentation of this file.
1#pragma once
2
3#include <array>
4#include <cstdint>
5#include <string_view>
6
7#include "array_utils.h"
8#include "types.h"
9
10namespace kuzu {
11namespace common {
12
13extern const char* KUZU_VERSION;
14
15#define DEFAULT_VECTOR_CAPACITY_LOG_2 11
16#ifndef KUZU_VECTOR_CAPACITY_LOG2
17#define VECTOR_CAPACITY_LOG_2 DEFAULT_VECTOR_CAPACITY_LOG_2
18#else
19#define VECTOR_CAPACITY_LOG_2 KUZU_VECTOR_CAPACITY_LOG2
20#endif
21#if VECTOR_CAPACITY_LOG_2 > 12
22#error "Vector capacity log2 should be less than or equal to 12"
23#endif
24constexpr uint64_t DEFAULT_VECTOR_CAPACITY = static_cast<uint64_t>(1) << VECTOR_CAPACITY_LOG_2;
25
26constexpr double DEFAULT_HT_LOAD_FACTOR = 1.5;
27
28// This is the default thread sleep time we use when a thread,
29// e.g., a worker thread is in TaskScheduler, needs to block.
31
32constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_IN_MICROS = 5000000;
33
34// Note that some places use std::bit_ceil to calculate resizes,
35// which won't work for values other than 2. If this is changed, those will need to be updated
36constexpr uint64_t CHUNK_RESIZE_RATIO = 2;
37
39 static constexpr char ANONYMOUS[] = "";
40 static constexpr char ID[] = "_ID";
41 static constexpr char LABEL[] = "_LABEL";
42 static constexpr char SRC[] = "_SRC";
43 static constexpr char DST[] = "_DST";
44 static constexpr char DIRECTION[] = "_DIRECTION";
45 static constexpr char LENGTH[] = "_LENGTH";
46 static constexpr char NODES[] = "_NODES";
47 static constexpr char RELS[] = "_RELS";
48 static constexpr char STAR[] = "*";
49 static constexpr char PLACE_HOLDER[] = "_PLACE_HOLDER";
50 static constexpr char MAP_KEY[] = "KEY";
51 static constexpr char MAP_VALUE[] = "VALUE";
52
53 static constexpr std::string_view ROW_OFFSET = "_row_offset";
54 static constexpr std::string_view SRC_OFFSET = "_src_offset";
55 static constexpr std::string_view DST_OFFSET = "_dst_offset";
56};
57
58enum PageSizeClass : uint8_t {
61};
62
63// Currently the system supports files with 2 different pages size, which we refer to as
64// PAGE_SIZE and TEMP_PAGE_SIZE. PAGE_SIZE is the default size of the page which is the
65// unit of read/write to the database files.
66#ifdef KUZU_PAGE_SIZE_LOG2
67static constexpr uint64_t PAGE_SIZE_LOG2 = KUZU_PAGE_SIZE_LOG2;
68#else
69static constexpr uint64_t PAGE_SIZE_LOG2 = 12; // Default to 4KB.
70#endif
71static constexpr uint64_t KUZU_PAGE_SIZE = static_cast<uint64_t>(1) << PAGE_SIZE_LOG2;
72// Page size for files with large pages, e.g., temporary files that are used by operators that
73// may require large amounts of memory.
74static constexpr uint64_t TEMP_PAGE_SIZE_LOG2 = 18;
75static const uint64_t TEMP_PAGE_SIZE = static_cast<uint64_t>(1) << TEMP_PAGE_SIZE_LOG2;
76
78 // If a user does not specify a max size for BM, we by default set the max size of BM to
79 // maxPhyMemSize * DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM.
80 static constexpr double DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM = 0.8;
81// The default max size for a VMRegion.
82#ifdef __32BIT__
83 static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = (uint64_t)1 << 30; // (1GB)
84#else
85 static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = static_cast<uint64_t>(1) << 43; // (8TB)
86#endif
87 static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 28; // (256MB)
88};
89
91 static constexpr char OVERFLOW_FILE_SUFFIX[] = ".ovf";
92 static constexpr char WAL_FILE_SUFFIX[] = ".wal";
93 static constexpr char SHADOWING_SUFFIX[] = ".shadow";
94 static constexpr char INDEX_FILE_SUFFIX[] = ".hindex";
95 static constexpr char CATALOG_FILE_NAME[] = "catalog.kz";
96 static constexpr char CATALOG_FILE_NAME_FOR_WAL[] = "catalog.shadow";
97 static constexpr char DATA_FILE_NAME[] = "data.kz";
98 static constexpr char METADATA_FILE_NAME[] = "metadata.kz";
99 static constexpr char METADATA_FILE_NAME_FOR_WAL[] = "metadata.shadow";
100 static constexpr char LOCK_FILE_NAME[] = ".lock";
101
102 // The number of pages that we add at one time when we need to grow a file.
103 static constexpr uint64_t PAGE_GROUP_SIZE_LOG2 = 10;
104 static constexpr uint64_t PAGE_GROUP_SIZE = static_cast<uint64_t>(1) << PAGE_GROUP_SIZE_LOG2;
105 static constexpr uint64_t PAGE_IDX_IN_GROUP_MASK =
106 (static_cast<uint64_t>(1) << PAGE_GROUP_SIZE_LOG2) - 1;
107
108 static constexpr uint64_t NODE_GROUP_SIZE_LOG2 = 17; // 64 * 2048 nodes per group
109 static constexpr uint64_t NODE_GROUP_SIZE = static_cast<uint64_t>(1) << NODE_GROUP_SIZE_LOG2;
110
111 static constexpr double PACKED_CSR_DENSITY = 0.8;
112 static constexpr double LEAF_HIGH_CSR_DENSITY = 1.0;
113 // The number of CSR lists in a leaf region.
114 static constexpr uint64_t CSR_LEAF_REGION_SIZE_LOG2 = 10;
115 static constexpr uint64_t CSR_LEAF_REGION_SIZE = static_cast<uint64_t>(1)
117
118 static constexpr uint64_t MAX_NUM_ROWS_IN_TABLE = static_cast<uint64_t>(1) << 62;
119
120 static constexpr char TEMP_SPILLING_FILE_NAME[] = ".tmp";
121};
122
123// Hash Index Configurations
125 static constexpr uint16_t SLOT_CAPACITY_BYTES = 256;
126};
127
129 // Initial size of buffer for CSV Reader.
130 static constexpr uint64_t INITIAL_BUFFER_SIZE = 16384;
131 // This means that we will usually read the entirety of the contents of the file we need for a
132 // block in one read request. It is also very small, which means we can parallelize small files
133 // efficiently.
134 static constexpr uint64_t PARALLEL_BLOCK_SIZE = INITIAL_BUFFER_SIZE / 2;
135
136 static constexpr const char* IGNORE_ERRORS_OPTION_NAME = "IGNORE_ERRORS";
137 static constexpr const char* BOOL_CSV_PARSING_OPTIONS[] = {"HEADER", "PARALLEL",
138 "LIST_UNBRACED", "AUTODETECT", "AUTO_DETECT", CopyConstants::IGNORE_ERRORS_OPTION_NAME};
139 static constexpr bool DEFAULT_CSV_HAS_HEADER = false;
140 static constexpr bool DEFAULT_CSV_PARALLEL = true;
141
142 // Default configuration for csv file parsing
143 static constexpr const char* STRING_CSV_PARSING_OPTIONS[] = {"ESCAPE", "DELIM", "DELIMITER",
144 "QUOTE"};
145 static constexpr char DEFAULT_CSV_ESCAPE_CHAR = '"';
146 static constexpr char DEFAULT_CSV_DELIMITER = ',';
147 static constexpr bool DEFAULT_CSV_ALLOW_UNBRACED_LIST = false;
148 static constexpr char DEFAULT_CSV_QUOTE_CHAR = '"';
149 static constexpr char DEFAULT_CSV_LIST_BEGIN_CHAR = '[';
150 static constexpr char DEFAULT_CSV_LIST_END_CHAR = ']';
151 static constexpr bool DEFAULT_IGNORE_ERRORS = false;
152 static constexpr bool DEFAULT_CSV_AUTO_DETECT = true;
153 static constexpr bool DEFAULT_CSV_SET_DIALECT = false;
154 static constexpr std::array DEFAULT_CSV_DELIMITER_SEARCH_SPACE = {',', ';', '\t', '|'};
155 static constexpr std::array DEFAULT_CSV_QUOTE_SEARCH_SPACE = {'"', '\''};
156 static constexpr std::array DEFAULT_CSV_ESCAPE_SEARCH_SPACE = {'"', '\\', '\''};
157 static constexpr uint64_t PANDAS_PARTITION_COUNT = 50 * DEFAULT_VECTOR_CAPACITY;
158
159 static constexpr const char* INT_CSV_PARSING_OPTIONS[] = {"SKIP", "SAMPLE_SIZE"};
160 static constexpr uint64_t DEFAULT_CSV_SKIP_NUM = 0;
161 static constexpr uint64_t DEFAULT_CSV_TYPE_DEDUCTION_SAMPLE_SIZE = 256;
162
163 // metadata columns used to populate CSV warnings
164 static constexpr std::array SHARED_WARNING_DATA_COLUMN_NAMES = {"blockIdx", "offsetInBlock",
165 "startByteOffset", "endByteOffset"};
170
171 static constexpr std::array CSV_SPECIFIC_WARNING_DATA_COLUMN_NAMES = {"fileIdx"};
173
174 static constexpr std::array CSV_WARNING_DATA_COLUMN_NAMES =
176 static constexpr std::array CSV_WARNING_DATA_COLUMN_TYPES =
181
183};
184
186 static constexpr double NON_EQUALITY_PREDICATE_SELECTIVITY = 0.1;
187 static constexpr double EQUALITY_PREDICATE_SELECTIVITY = 0.01;
188 static constexpr uint64_t BUILD_PENALTY = 2;
189 // Avoid doing probe to build SIP if we have to accumulate a probe side that is much bigger than
190 // build side. Also avoid doing build to probe SIP if probe side is not much bigger than build.
191 static constexpr uint64_t SIP_RATIO = 5;
192};
193
195 static constexpr uint64_t NUM_BYTES_FOR_PAYLOAD_IDX = 8;
197 static constexpr uint64_t MIN_LIMIT_RATIO_TO_REDUCE = 2;
198};
199
201 static constexpr uint64_t PARQUET_DEFINE_VALID = 65535;
202 static constexpr const char* PARQUET_MAGIC_WORDS = "PAR1";
203 // We limit the uncompressed page size to 100MB.
204 // The max size in Parquet is 2GB, but we choose a more conservative limit.
205 static constexpr uint64_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
206 // Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page.
207 // For this reason we go with a much higher, but still a conservative upper bound of 1GB.
208 static constexpr uint64_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1e9;
209 // The maximum size a key entry in an RLE page takes.
210 static constexpr uint64_t MAX_DICTIONARY_KEY_SIZE = sizeof(uint32_t);
211 // The size of encoding the string length.
212 static constexpr uint64_t STRING_LENGTH_SIZE = sizeof(uint32_t);
213 static constexpr uint64_t MAX_STRING_STATISTICS_SIZE = 10000;
214 static constexpr uint64_t PARQUET_INTERVAL_SIZE = 12;
215 static constexpr uint64_t PARQUET_UUID_SIZE = 16;
216};
217
219 static constexpr const char* DEFAULT_CSV_NEWLINE = "\n";
220 static constexpr const char* DEFAULT_NULL_STR = "";
221 static constexpr bool DEFAULT_FORCE_QUOTE = false;
222 static constexpr uint64_t DEFAULT_CSV_FLUSH_SIZE = 4096 * 8;
223};
224
226 static constexpr char SCHEMA_NAME[] = "schema.cypher";
227 static constexpr char COPY_NAME[] = "copy.cypher";
228};
229
231 static constexpr std::array WARNING_TABLE_COLUMN_NAMES{"query_id", "message", "file_path",
232 "line_number", "skipped_line_or_record"};
235 static constexpr uint64_t WARNING_TABLE_NUM_COLUMNS = WARNING_TABLE_COLUMN_NAMES.size();
236
238};
239
240static constexpr char ATTACHED_KUZU_DB_TYPE[] = "KUZU";
241
242static constexpr char LOCAL_DB_NAME[] = "local(kuzu)";
243
244constexpr auto DECIMAL_PRECISION_LIMIT = 38;
245
246static constexpr char SCAN_JSON_FUNC_NAME[] = "READ_JSON";
247
248} // namespace common
249} // namespace kuzu
#define VECTOR_CAPACITY_LOG_2
Definition constants.h:17
constexpr auto DECIMAL_PRECISION_LIMIT
Definition constants.h:244
constexpr uint64_t DEFAULT_VECTOR_CAPACITY
Definition constants.h:24
constexpr std::array< T, N1+N2 > arrayConcat(const std::array< T, N1 > &arr1, const std::array< T, N2 > &arr2)
Definition array_utils.h:9
constexpr uint64_t CHUNK_RESIZE_RATIO
Definition constants.h:36
constexpr double DEFAULT_HT_LOAD_FACTOR
Definition constants.h:26
uint32_t column_id_t
Definition types.h:38
constexpr uint64_t THREAD_SLEEP_TIME_WHEN_WAITING_IN_MICROS
Definition constants.h:30
const char * KUZU_VERSION
PageSizeClass
Definition constants.h:58
@ TEMP_PAGE
Definition constants.h:60
@ REGULAR_PAGE
Definition constants.h:59
constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_IN_MICROS
Definition constants.h:32
Definition array_utils.h:7
Definition constants.h:77
static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE
Definition constants.h:85
static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING
Definition constants.h:87
static constexpr double DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM
Definition constants.h:80
Definition constants.h:128
static constexpr const char * STRING_CSV_PARSING_OPTIONS[]
Definition constants.h:143
static constexpr char DEFAULT_CSV_DELIMITER
Definition constants.h:146
static constexpr uint64_t DEFAULT_CSV_SKIP_NUM
Definition constants.h:160
static constexpr std::array CSV_SPECIFIC_WARNING_DATA_COLUMN_NAMES
Definition constants.h:171
static constexpr bool DEFAULT_CSV_AUTO_DETECT
Definition constants.h:152
static constexpr std::array SHARED_WARNING_DATA_COLUMN_NAMES
Definition constants.h:164
static constexpr uint64_t PANDAS_PARTITION_COUNT
Definition constants.h:157
static constexpr std::array CSV_SPECIFIC_WARNING_DATA_COLUMN_TYPES
Definition constants.h:172
static constexpr uint64_t DEFAULT_CSV_TYPE_DEDUCTION_SAMPLE_SIZE
Definition constants.h:161
static constexpr const char * INT_CSV_PARSING_OPTIONS[]
Definition constants.h:159
static constexpr std::array DEFAULT_CSV_QUOTE_SEARCH_SPACE
Definition constants.h:155
static constexpr char DEFAULT_CSV_QUOTE_CHAR
Definition constants.h:148
static constexpr char DEFAULT_CSV_LIST_BEGIN_CHAR
Definition constants.h:149
static constexpr bool DEFAULT_CSV_ALLOW_UNBRACED_LIST
Definition constants.h:147
static constexpr const char * BOOL_CSV_PARSING_OPTIONS[]
Definition constants.h:137
static constexpr bool DEFAULT_CSV_SET_DIALECT
Definition constants.h:153
static constexpr column_id_t CSV_WARNING_DATA_NUM_COLUMNS
Definition constants.h:178
static constexpr std::array CSV_WARNING_DATA_COLUMN_TYPES
Definition constants.h:176
static constexpr bool DEFAULT_IGNORE_ERRORS
Definition constants.h:151
static constexpr column_id_t MAX_NUM_WARNING_DATA_COLUMNS
Definition constants.h:182
static constexpr std::array DEFAULT_CSV_DELIMITER_SEARCH_SPACE
Definition constants.h:154
static constexpr const char * IGNORE_ERRORS_OPTION_NAME
Definition constants.h:136
static constexpr uint64_t PARALLEL_BLOCK_SIZE
Definition constants.h:134
static constexpr std::array SHARED_WARNING_DATA_COLUMN_TYPES
Definition constants.h:166
static constexpr column_id_t SHARED_WARNING_DATA_NUM_COLUMNS
Definition constants.h:168
static constexpr bool DEFAULT_CSV_HAS_HEADER
Definition constants.h:139
static constexpr bool DEFAULT_CSV_PARALLEL
Definition constants.h:140
static constexpr char DEFAULT_CSV_LIST_END_CHAR
Definition constants.h:150
static constexpr uint64_t INITIAL_BUFFER_SIZE
Definition constants.h:130
static constexpr char DEFAULT_CSV_ESCAPE_CHAR
Definition constants.h:145
static constexpr std::array CSV_WARNING_DATA_COLUMN_NAMES
Definition constants.h:174
static constexpr std::array DEFAULT_CSV_ESCAPE_SEARCH_SPACE
Definition constants.h:156
Definition constants.h:218
static constexpr const char * DEFAULT_CSV_NEWLINE
Definition constants.h:219
static constexpr uint64_t DEFAULT_CSV_FLUSH_SIZE
Definition constants.h:222
static constexpr const char * DEFAULT_NULL_STR
Definition constants.h:220
static constexpr bool DEFAULT_FORCE_QUOTE
Definition constants.h:221
Definition constants.h:124
static constexpr uint16_t SLOT_CAPACITY_BYTES
Definition constants.h:125
Definition constants.h:225
static constexpr char COPY_NAME[]
Definition constants.h:227
static constexpr char SCHEMA_NAME[]
Definition constants.h:226
Definition constants.h:38
static constexpr char STAR[]
Definition constants.h:48
static constexpr char LENGTH[]
Definition constants.h:45
static constexpr char PLACE_HOLDER[]
Definition constants.h:49
static constexpr std::string_view DST_OFFSET
Definition constants.h:55
static constexpr char RELS[]
Definition constants.h:47
static constexpr char DIRECTION[]
Definition constants.h:44
static constexpr char ID[]
Definition constants.h:40
static constexpr std::string_view SRC_OFFSET
Definition constants.h:54
static constexpr char DST[]
Definition constants.h:43
static constexpr std::string_view ROW_OFFSET
Definition constants.h:53
static constexpr char MAP_KEY[]
Definition constants.h:50
static constexpr char MAP_VALUE[]
Definition constants.h:51
static constexpr char SRC[]
Definition constants.h:42
static constexpr char LABEL[]
Definition constants.h:41
static constexpr char NODES[]
Definition constants.h:46
static constexpr char ANONYMOUS[]
Definition constants.h:39
Definition constants.h:194
static constexpr uint64_t MIN_SIZE_TO_REDUCE
Definition constants.h:196
static constexpr uint64_t MIN_LIMIT_RATIO_TO_REDUCE
Definition constants.h:197
static constexpr uint64_t NUM_BYTES_FOR_PAYLOAD_IDX
Definition constants.h:195
Definition constants.h:200
static constexpr uint64_t PARQUET_DEFINE_VALID
Definition constants.h:201
static constexpr uint64_t PARQUET_UUID_SIZE
Definition constants.h:215
static constexpr uint64_t MAX_UNCOMPRESSED_PAGE_SIZE
Definition constants.h:205
static constexpr uint64_t STRING_LENGTH_SIZE
Definition constants.h:212
static constexpr uint64_t MAX_STRING_STATISTICS_SIZE
Definition constants.h:213
static constexpr uint64_t MAX_DICTIONARY_KEY_SIZE
Definition constants.h:210
static constexpr const char * PARQUET_MAGIC_WORDS
Definition constants.h:202
static constexpr uint64_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE
Definition constants.h:208
static constexpr uint64_t PARQUET_INTERVAL_SIZE
Definition constants.h:214
Definition constants.h:185
static constexpr double NON_EQUALITY_PREDICATE_SELECTIVITY
Definition constants.h:186
static constexpr double EQUALITY_PREDICATE_SELECTIVITY
Definition constants.h:187
static constexpr uint64_t BUILD_PENALTY
Definition constants.h:188
static constexpr uint64_t SIP_RATIO
Definition constants.h:191
Definition constants.h:90
static constexpr char CATALOG_FILE_NAME_FOR_WAL[]
Definition constants.h:96
static constexpr char CATALOG_FILE_NAME[]
Definition constants.h:95
static constexpr char METADATA_FILE_NAME_FOR_WAL[]
Definition constants.h:99
static constexpr char TEMP_SPILLING_FILE_NAME[]
Definition constants.h:120
static constexpr char WAL_FILE_SUFFIX[]
Definition constants.h:92
static constexpr char LOCK_FILE_NAME[]
Definition constants.h:100
static constexpr uint64_t MAX_NUM_ROWS_IN_TABLE
Definition constants.h:118
static constexpr uint64_t CSR_LEAF_REGION_SIZE
Definition constants.h:115
static constexpr char METADATA_FILE_NAME[]
Definition constants.h:98
static constexpr uint64_t PAGE_IDX_IN_GROUP_MASK
Definition constants.h:105
static constexpr char OVERFLOW_FILE_SUFFIX[]
Definition constants.h:91
static constexpr uint64_t CSR_LEAF_REGION_SIZE_LOG2
Definition constants.h:114
static constexpr char SHADOWING_SUFFIX[]
Definition constants.h:93
static constexpr uint64_t PAGE_GROUP_SIZE
Definition constants.h:104
static constexpr char DATA_FILE_NAME[]
Definition constants.h:97
static constexpr uint64_t NODE_GROUP_SIZE_LOG2
Definition constants.h:108
static constexpr char INDEX_FILE_SUFFIX[]
Definition constants.h:94
static constexpr double PACKED_CSR_DENSITY
Definition constants.h:111
static constexpr uint64_t PAGE_GROUP_SIZE_LOG2
Definition constants.h:103
static constexpr uint64_t NODE_GROUP_SIZE
Definition constants.h:109
static constexpr double LEAF_HIGH_CSR_DENSITY
Definition constants.h:112
Definition constants.h:230
static constexpr std::array WARNING_TABLE_COLUMN_DATA_TYPES
Definition constants.h:233
static constexpr std::array WARNING_TABLE_COLUMN_NAMES
Definition constants.h:231
static constexpr uint64_t WARNING_TABLE_NUM_COLUMNS
Definition constants.h:235