From 0a7b5bef94526fafb387fa9b9398d5f05d3b81b6 Mon Sep 17 00:00:00 2001 From: Wang Fenjin Date: Mon, 12 Dec 2022 13:48:11 +0800 Subject: [PATCH] update to 0.6.1 (#104) * update to 0.6.1 Change-Id: I60f15d0cecf67c9355620abe40c5cabd13daef93 * use release bin for san test Change-Id: Id10d0e51ca4f27094d291226156ef93c127f7322 --- .github/workflows/rust.yaml | 21 +- Cargo.toml | 4 +- libduckdb-sys/Cargo.toml | 2 +- .../duckdb/bindgen_bundled_version.rs | 1573 +++++----- libduckdb-sys/duckdb/duckdb.cpp | 2533 +++++++++++------ libduckdb-sys/duckdb/duckdb.h | 2 + libduckdb-sys/duckdb/duckdb.hpp | 2281 ++++++++------- libduckdb-sys/upgrade.sh | 2 +- src/types/from_sql.rs | 10 +- 9 files changed, 3636 insertions(+), 2792 deletions(-) diff --git a/.github/workflows/rust.yaml b/.github/workflows/rust.yaml index ab89a96b..35358cd9 100644 --- a/.github/workflows/rust.yaml +++ b/.github/workflows/rust.yaml @@ -38,7 +38,7 @@ jobs: name: Download duckdb with: repository: "duckdb/duckdb" - tag: "v0.6.0" + tag: "v0.6.1" fileName: ${{ matrix.duckdb }} out-file-path: . @@ -117,17 +117,34 @@ jobs: with: rust-version: nightly components: rust-src + # download libduckdb + - uses: robinraju/release-downloader@v1.4 + name: Download duckdb + with: + repository: "duckdb/duckdb" + tag: "v0.6.1" + fileName: "libduckdb-linux-amd64.zip" + out-file-path: . + - name: Linux extract duckdb + uses: ihiroky/extract-action@v1 + with: + file_path: ${{ github.workspace }}/libduckdb-linux-amd64.zip + extract_dir: libduckdb - name: Tests with asan env: RUSTFLAGS: -Zsanitizer=address RUSTDOCFLAGS: -Zsanitizer=address ASAN_OPTIONS: "detect_stack_use_after_return=1:detect_leaks=1" + DUCKDB_LIB_DIR: ${{ github.workspace }}/libduckdb + DUCKDB_INCLUDE_DIR: ${{ github.workspace }}/libduckdb + LD_LIBRARY_PATH: ${{ github.workspace }}/libduckdb # Work around https://github.com/rust-lang/rust/issues/59125 by # disabling backtraces. In an ideal world we'd probably suppress the # leak sanitization, but we don't care about backtraces here, so long # as the other tests have them. RUST_BACKTRACE: "0" - run: cargo -Z build-std test --features 'bundled' --features 'modern-full' --target x86_64-unknown-linux-gnu + # run: cargo -Z build-std test --features 'bundled' --features 'modern-full' --target x86_64-unknown-linux-gnu + run: cargo -Z build-std test --features 'modern-full' --target x86_64-unknown-linux-gnu - uses: wangfenjin/publish-crates@main name: cargo publish --dry-run diff --git a/Cargo.toml b/Cargo.toml index af186a2f..c62ef450 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "duckdb" -version = "0.6.0" +version = "0.6.1" authors = ["wangfenjin "] edition = "2021" description = "Ergonomic wrapper for DuckDB" @@ -69,7 +69,7 @@ tempdir = "0.3.7" [dependencies.libduckdb-sys] path = "libduckdb-sys" -version = "0.6.0" +version = "0.6.1" [package.metadata.docs.rs] features = [] diff --git a/libduckdb-sys/Cargo.toml b/libduckdb-sys/Cargo.toml index cbc8e07c..273dbbb2 100644 --- a/libduckdb-sys/Cargo.toml +++ b/libduckdb-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libduckdb-sys" -version = "0.6.0" +version = "0.6.1" authors = ["wangfenjin "] edition = "2021" build = "build.rs" diff --git a/libduckdb-sys/duckdb/bindgen_bundled_version.rs b/libduckdb-sys/duckdb/bindgen_bundled_version.rs index 9c98afe9..c03c7551 100644 --- a/libduckdb-sys/duckdb/bindgen_bundled_version.rs +++ b/libduckdb-sys/duckdb/bindgen_bundled_version.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.61.0 */ +/* automatically generated by rust-bindgen 0.63.0 */ #[repr(C)] #[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -84,9 +84,9 @@ pub const DUCKDB_API_0_3_1: u32 = 1; pub const DUCKDB_API_0_3_2: u32 = 2; pub const DUCKDB_API_LATEST: u32 = 2; pub const DUCKDB_API_VERSION: u32 = 2; -pub const __bool_true_false_are_defined: u32 = 1; pub const true_: u32 = 1; pub const false_: u32 = 0; +pub const __bool_true_false_are_defined: u32 = 1; pub const __WORDSIZE: u32 = 64; pub const __DARWIN_ONLY_64_BIT_INO_T: u32 = 0; pub const __DARWIN_ONLY_UNIX_CONFORMANCE: u32 = 1; @@ -162,6 +162,12 @@ pub const WINT_MAX: u32 = 2147483647; pub const SIG_ATOMIC_MIN: i32 = -2147483648; pub const SIG_ATOMIC_MAX: u32 = 2147483647; pub const __API_TO_BE_DEPRECATED: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_MACOS: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_IOS: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_TVOS: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_WATCHOS: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_MACCATALYST: u32 = 100000; +pub const __API_TO_BE_DEPRECATED_DRIVERKIT: u32 = 100000; pub const __MAC_10_0: u32 = 1000; pub const __MAC_10_1: u32 = 1010; pub const __MAC_10_2: u32 = 1020; @@ -205,6 +211,7 @@ pub const __MAC_12_0: u32 = 120000; pub const __MAC_12_1: u32 = 120100; pub const __MAC_12_2: u32 = 120200; pub const __MAC_12_3: u32 = 120300; +pub const __MAC_13_0: u32 = 130000; pub const __IPHONE_2_0: u32 = 20000; pub const __IPHONE_2_1: u32 = 20100; pub const __IPHONE_2_2: u32 = 20200; @@ -265,6 +272,8 @@ pub const __IPHONE_15_1: u32 = 150100; pub const __IPHONE_15_2: u32 = 150200; pub const __IPHONE_15_3: u32 = 150300; pub const __IPHONE_15_4: u32 = 150400; +pub const __IPHONE_16_0: u32 = 160000; +pub const __IPHONE_16_1: u32 = 160100; pub const __TVOS_9_0: u32 = 90000; pub const __TVOS_9_1: u32 = 90100; pub const __TVOS_9_2: u32 = 90200; @@ -298,6 +307,8 @@ pub const __TVOS_15_1: u32 = 150100; pub const __TVOS_15_2: u32 = 150200; pub const __TVOS_15_3: u32 = 150300; pub const __TVOS_15_4: u32 = 150400; +pub const __TVOS_16_0: u32 = 160000; +pub const __TVOS_16_1: u32 = 160100; pub const __WATCHOS_1_0: u32 = 10000; pub const __WATCHOS_2_0: u32 = 20000; pub const __WATCHOS_2_1: u32 = 20100; @@ -329,6 +340,8 @@ pub const __WATCHOS_8_1: u32 = 80100; pub const __WATCHOS_8_3: u32 = 80300; pub const __WATCHOS_8_4: u32 = 80400; pub const __WATCHOS_8_5: u32 = 80500; +pub const __WATCHOS_9_0: u32 = 90000; +pub const __WATCHOS_9_1: u32 = 90100; pub const MAC_OS_X_VERSION_10_0: u32 = 1000; pub const MAC_OS_X_VERSION_10_1: u32 = 1010; pub const MAC_OS_X_VERSION_10_2: u32 = 1020; @@ -363,10 +376,11 @@ pub const MAC_OS_X_VERSION_10_15_1: u32 = 101501; pub const MAC_OS_X_VERSION_10_16: u32 = 101600; pub const MAC_OS_VERSION_11_0: u32 = 110000; pub const MAC_OS_VERSION_12_0: u32 = 120000; +pub const MAC_OS_VERSION_13_0: u32 = 130000; pub const __DRIVERKIT_19_0: u32 = 190000; pub const __DRIVERKIT_20_0: u32 = 200000; pub const __DRIVERKIT_21_0: u32 = 210000; -pub const __MAC_OS_X_VERSION_MAX_ALLOWED: u32 = 120300; +pub const __MAC_OS_X_VERSION_MAX_ALLOWED: u32 = 130000; pub const __ENABLE_LEGACY_MAC_AVAILABILITY: u32 = 1; pub const __DARWIN_WCHAR_MIN: i32 = -2147483648; pub const _FORTIFY_SOURCE: u32 = 2; @@ -504,7 +518,8 @@ pub const RUSAGE_INFO_V2: u32 = 2; pub const RUSAGE_INFO_V3: u32 = 3; pub const RUSAGE_INFO_V4: u32 = 4; pub const RUSAGE_INFO_V5: u32 = 5; -pub const RUSAGE_INFO_CURRENT: u32 = 5; +pub const RUSAGE_INFO_V6: u32 = 6; +pub const RUSAGE_INFO_CURRENT: u32 = 6; pub const RU_PROC_RUNS_RESLIDE: u32 = 1; pub const RLIMIT_CPU: u32 = 0; pub const RLIMIT_FSIZE: u32 = 1; @@ -538,6 +553,7 @@ pub const IOPOL_TYPE_VFS_IGNORE_CONTENT_PROTECTION: u32 = 6; pub const IOPOL_TYPE_VFS_IGNORE_PERMISSIONS: u32 = 7; pub const IOPOL_TYPE_VFS_SKIP_MTIME_UPDATE: u32 = 8; pub const IOPOL_TYPE_VFS_ALLOW_LOW_SPACE_WRITES: u32 = 9; +pub const IOPOL_TYPE_VFS_DISALLOW_RW_FOR_O_EVTONLY: u32 = 10; pub const IOPOL_SCOPE_PROCESS: u32 = 0; pub const IOPOL_SCOPE_THREAD: u32 = 1; pub const IOPOL_SCOPE_DARWIN_BG: u32 = 2; @@ -566,6 +582,8 @@ pub const IOPOL_VFS_SKIP_MTIME_UPDATE_OFF: u32 = 0; pub const IOPOL_VFS_SKIP_MTIME_UPDATE_ON: u32 = 1; pub const IOPOL_VFS_ALLOW_LOW_SPACE_WRITES_OFF: u32 = 0; pub const IOPOL_VFS_ALLOW_LOW_SPACE_WRITES_ON: u32 = 1; +pub const IOPOL_VFS_DISALLOW_RW_FOR_O_EVTONLY_DEFAULT: u32 = 0; +pub const IOPOL_VFS_DISALLOW_RW_FOR_O_EVTONLY_ON: u32 = 1; pub const WNOHANG: u32 = 1; pub const WUNTRACED: u32 = 2; pub const WCOREFLAG: u32 = 128; @@ -9525,7 +9543,510 @@ fn bindgen_test_layout_rusage_info_v5() { ) ); } -pub type rusage_info_current = rusage_info_v5; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct rusage_info_v6 { + pub ri_uuid: [u8; 16usize], + pub ri_user_time: u64, + pub ri_system_time: u64, + pub ri_pkg_idle_wkups: u64, + pub ri_interrupt_wkups: u64, + pub ri_pageins: u64, + pub ri_wired_size: u64, + pub ri_resident_size: u64, + pub ri_phys_footprint: u64, + pub ri_proc_start_abstime: u64, + pub ri_proc_exit_abstime: u64, + pub ri_child_user_time: u64, + pub ri_child_system_time: u64, + pub ri_child_pkg_idle_wkups: u64, + pub ri_child_interrupt_wkups: u64, + pub ri_child_pageins: u64, + pub ri_child_elapsed_abstime: u64, + pub ri_diskio_bytesread: u64, + pub ri_diskio_byteswritten: u64, + pub ri_cpu_time_qos_default: u64, + pub ri_cpu_time_qos_maintenance: u64, + pub ri_cpu_time_qos_background: u64, + pub ri_cpu_time_qos_utility: u64, + pub ri_cpu_time_qos_legacy: u64, + pub ri_cpu_time_qos_user_initiated: u64, + pub ri_cpu_time_qos_user_interactive: u64, + pub ri_billed_system_time: u64, + pub ri_serviced_system_time: u64, + pub ri_logical_writes: u64, + pub ri_lifetime_max_phys_footprint: u64, + pub ri_instructions: u64, + pub ri_cycles: u64, + pub ri_billed_energy: u64, + pub ri_serviced_energy: u64, + pub ri_interval_max_phys_footprint: u64, + pub ri_runnable_time: u64, + pub ri_flags: u64, + pub ri_user_ptime: u64, + pub ri_system_ptime: u64, + pub ri_pinstructions: u64, + pub ri_pcycles: u64, + pub ri_energy_nj: u64, + pub ri_penergy_nj: u64, + pub ri_reserved: [u64; 14usize], +} +#[test] +fn bindgen_test_layout_rusage_info_v6() { + const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); + let ptr = UNINIT.as_ptr(); + assert_eq!( + ::std::mem::size_of::(), + 464usize, + concat!("Size of: ", stringify!(rusage_info_v6)) + ); + assert_eq!( + ::std::mem::align_of::(), + 8usize, + concat!("Alignment of ", stringify!(rusage_info_v6)) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_uuid) as usize - ptr as usize }, + 0usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_uuid) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_user_time) as usize - ptr as usize }, + 16usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_user_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_system_time) as usize - ptr as usize }, + 24usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_system_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_pkg_idle_wkups) as usize - ptr as usize }, + 32usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_pkg_idle_wkups) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_interrupt_wkups) as usize - ptr as usize }, + 40usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_interrupt_wkups) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_pageins) as usize - ptr as usize }, + 48usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_pageins) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_wired_size) as usize - ptr as usize }, + 56usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_wired_size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_resident_size) as usize - ptr as usize }, + 64usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_resident_size) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_phys_footprint) as usize - ptr as usize }, + 72usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_phys_footprint) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_proc_start_abstime) as usize - ptr as usize }, + 80usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_proc_start_abstime) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_proc_exit_abstime) as usize - ptr as usize }, + 88usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_proc_exit_abstime) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_user_time) as usize - ptr as usize }, + 96usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_user_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_system_time) as usize - ptr as usize }, + 104usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_system_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_pkg_idle_wkups) as usize - ptr as usize }, + 112usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_pkg_idle_wkups) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_interrupt_wkups) as usize - ptr as usize }, + 120usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_interrupt_wkups) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_pageins) as usize - ptr as usize }, + 128usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_pageins) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_child_elapsed_abstime) as usize - ptr as usize }, + 136usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_child_elapsed_abstime) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_diskio_bytesread) as usize - ptr as usize }, + 144usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_diskio_bytesread) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_diskio_byteswritten) as usize - ptr as usize }, + 152usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_diskio_byteswritten) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_default) as usize - ptr as usize }, + 160usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_default) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_maintenance) as usize - ptr as usize }, + 168usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_maintenance) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_background) as usize - ptr as usize }, + 176usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_background) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_utility) as usize - ptr as usize }, + 184usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_utility) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_legacy) as usize - ptr as usize }, + 192usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_legacy) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_user_initiated) as usize - ptr as usize }, + 200usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_user_initiated) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cpu_time_qos_user_interactive) as usize - ptr as usize }, + 208usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cpu_time_qos_user_interactive) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_billed_system_time) as usize - ptr as usize }, + 216usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_billed_system_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_serviced_system_time) as usize - ptr as usize }, + 224usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_serviced_system_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_logical_writes) as usize - ptr as usize }, + 232usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_logical_writes) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_lifetime_max_phys_footprint) as usize - ptr as usize }, + 240usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_lifetime_max_phys_footprint) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_instructions) as usize - ptr as usize }, + 248usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_instructions) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_cycles) as usize - ptr as usize }, + 256usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_cycles) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_billed_energy) as usize - ptr as usize }, + 264usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_billed_energy) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_serviced_energy) as usize - ptr as usize }, + 272usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_serviced_energy) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_interval_max_phys_footprint) as usize - ptr as usize }, + 280usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_interval_max_phys_footprint) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_runnable_time) as usize - ptr as usize }, + 288usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_runnable_time) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_flags) as usize - ptr as usize }, + 296usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_flags) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_user_ptime) as usize - ptr as usize }, + 304usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_user_ptime) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_system_ptime) as usize - ptr as usize }, + 312usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_system_ptime) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_pinstructions) as usize - ptr as usize }, + 320usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_pinstructions) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_pcycles) as usize - ptr as usize }, + 328usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_pcycles) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_energy_nj) as usize - ptr as usize }, + 336usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_energy_nj) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_penergy_nj) as usize - ptr as usize }, + 344usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_penergy_nj) + ) + ); + assert_eq!( + unsafe { ::std::ptr::addr_of!((*ptr).ri_reserved) as usize - ptr as usize }, + 352usize, + concat!( + "Offset of field: ", + stringify!(rusage_info_v6), + "::", + stringify!(ri_reserved) + ) + ); +} +pub type rusage_info_current = rusage_info_v6; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct rlimit { @@ -10545,8 +11066,7 @@ pub const DUCKDB_TYPE_DUCKDB_TYPE_JSON: DUCKDB_TYPE = 28; pub const DUCKDB_TYPE_DUCKDB_TYPE_UNION: DUCKDB_TYPE = 29; pub type DUCKDB_TYPE = ::std::os::raw::c_uint; pub use self::DUCKDB_TYPE as duckdb_type; -#[doc = "! Days are stored as days since 1970-01-01"] -#[doc = "! Use the duckdb_from_date/duckdb_to_date function to extract individual information"] +#[doc = "! Days are stored as days since 1970-01-01\n! Use the duckdb_from_date/duckdb_to_date function to extract individual information"] #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct duckdb_date { @@ -10624,8 +11144,7 @@ fn bindgen_test_layout_duckdb_date_struct() { ) ); } -#[doc = "! Time is stored as microseconds since 00:00:00"] -#[doc = "! Use the duckdb_from_time/duckdb_to_time function to extract individual information"] +#[doc = "! Time is stored as microseconds since 00:00:00\n! Use the duckdb_from_time/duckdb_to_time function to extract individual information"] #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct duckdb_time { @@ -10714,8 +11233,7 @@ fn bindgen_test_layout_duckdb_time_struct() { ) ); } -#[doc = "! Timestamps are stored as microseconds since 1970-01-01"] -#[doc = "! Use the duckdb_from_timestamp/duckdb_to_timestamp function to extract individual information"] +#[doc = "! Timestamps are stored as microseconds since 1970-01-01\n! Use the duckdb_from_timestamp/duckdb_to_timestamp function to extract individual information"] #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct duckdb_timestamp { @@ -10834,9 +11352,7 @@ fn bindgen_test_layout_duckdb_interval() { ) ); } -#[doc = "! Hugeints are composed in a (lower, upper) component"] -#[doc = "! The value of the hugeint is upper * 2^64 + lower"] -#[doc = "! For easy usage, the functions duckdb_hugeint_to_double/duckdb_double_to_hugeint are recommended"] +#[doc = "! Hugeints are composed in a (lower, upper) component\n! The value of the hugeint is upper * 2^64 + lower\n! For easy usage, the functions duckdb_hugeint_to_double/duckdb_double_to_hugeint are recommended"] #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct duckdb_hugeint { @@ -11147,23 +11663,11 @@ pub const duckdb_pending_state_DUCKDB_PENDING_RESULT_NOT_READY: duckdb_pending_s pub const duckdb_pending_state_DUCKDB_PENDING_ERROR: duckdb_pending_state = 2; pub type duckdb_pending_state = ::std::os::raw::c_uint; extern "C" { - #[doc = "Creates a new database or opens an existing database file stored at the the given path."] - #[doc = "If no path is given a new in-memory database is created instead."] - #[doc = ""] - #[doc = " path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database."] - #[doc = " out_database: The result database object."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Creates a new database or opens an existing database file stored at the the given path.\nIf no path is given a new in-memory database is created instead.\n\n path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.\n out_database: The result database object.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_open(path: *const ::std::os::raw::c_char, out_database: *mut duckdb_database) -> duckdb_state; } extern "C" { - #[doc = "Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the the given path."] - #[doc = ""] - #[doc = " path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database."] - #[doc = " out_database: The result database object."] - #[doc = " config: (Optional) configuration used to start up the database system."] - #[doc = " out_error: If set and the function returns DuckDBError, this will contain the reason why the start-up failed."] - #[doc = "Note that the error must be freed using `duckdb_free`."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Extended version of duckdb_open. Creates a new database or opens an existing database file stored at the the given path.\n\n path: Path to the database file on disk, or `nullptr` or `:memory:` to open an in-memory database.\n out_database: The result database object.\n config: (Optional) configuration used to start up the database system.\n out_error: If set and the function returns DuckDBError, this will contain the reason why the start-up failed.\nNote that the error must be freed using `duckdb_free`.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_open_ext( path: *const ::std::os::raw::c_char, out_database: *mut duckdb_database, @@ -11172,63 +11676,31 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Closes the specified database and de-allocates all memory allocated for that database."] - #[doc = "This should be called after you are done with any database allocated through `duckdb_open`."] - #[doc = "Note that failing to call `duckdb_close` (in case of e.g. a program crash) will not cause data corruption."] - #[doc = "Still it is recommended to always correctly close a database object after you are done with it."] - #[doc = ""] - #[doc = " database: The database object to shut down."] + #[doc = "Closes the specified database and de-allocates all memory allocated for that database.\nThis should be called after you are done with any database allocated through `duckdb_open`.\nNote that failing to call `duckdb_close` (in case of e.g. a program crash) will not cause data corruption.\nStill it is recommended to always correctly close a database object after you are done with it.\n\n database: The database object to shut down."] pub fn duckdb_close(database: *mut duckdb_database); } extern "C" { - #[doc = "Opens a connection to a database. Connections are required to query the database, and store transactional state"] - #[doc = "associated with the connection."] - #[doc = ""] - #[doc = " database: The database file to connect to."] - #[doc = " out_connection: The result connection object."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Opens a connection to a database. Connections are required to query the database, and store transactional state\nassociated with the connection.\n\n database: The database file to connect to.\n out_connection: The result connection object.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_connect(database: duckdb_database, out_connection: *mut duckdb_connection) -> duckdb_state; } extern "C" { - #[doc = "Closes the specified connection and de-allocates all memory allocated for that connection."] - #[doc = ""] - #[doc = " connection: The connection to close."] + #[doc = "Closes the specified connection and de-allocates all memory allocated for that connection.\n\n connection: The connection to close."] pub fn duckdb_disconnect(connection: *mut duckdb_connection); } extern "C" { - #[doc = "Returns the version of the linked DuckDB, with a version postfix for dev versions"] - #[doc = ""] - #[doc = "Usually used for developing C extensions that must return this for a compatibility check."] + #[doc = "Returns the version of the linked DuckDB, with a version postfix for dev versions\n\nUsually used for developing C extensions that must return this for a compatibility check."] pub fn duckdb_library_version() -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Initializes an empty configuration object that can be used to provide start-up options for the DuckDB instance"] - #[doc = "through `duckdb_open_ext`."] - #[doc = ""] - #[doc = "This will always succeed unless there is a malloc failure."] - #[doc = ""] - #[doc = " out_config: The result configuration object."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Initializes an empty configuration object that can be used to provide start-up options for the DuckDB instance\nthrough `duckdb_open_ext`.\n\nThis will always succeed unless there is a malloc failure.\n\n out_config: The result configuration object.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_create_config(out_config: *mut duckdb_config) -> duckdb_state; } extern "C" { - #[doc = "This returns the total amount of configuration options available for usage with `duckdb_get_config_flag`."] - #[doc = ""] - #[doc = "This should not be called in a loop as it internally loops over all the options."] - #[doc = ""] - #[doc = " returns: The amount of config options available."] + #[doc = "This returns the total amount of configuration options available for usage with `duckdb_get_config_flag`.\n\nThis should not be called in a loop as it internally loops over all the options.\n\n returns: The amount of config options available."] pub fn duckdb_config_count() -> usize; } extern "C" { - #[doc = "Obtains a human-readable name and description of a specific configuration option. This can be used to e.g."] - #[doc = "display configuration options. This will succeed unless `index` is out of range (i.e. `>= duckdb_config_count`)."] - #[doc = ""] - #[doc = "The result name or description MUST NOT be freed."] - #[doc = ""] - #[doc = " index: The index of the configuration option (between 0 and `duckdb_config_count`)"] - #[doc = " out_name: A name of the configuration flag."] - #[doc = " out_description: A description of the configuration flag."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Obtains a human-readable name and description of a specific configuration option. This can be used to e.g.\ndisplay configuration options. This will succeed unless `index` is out of range (i.e. `>= duckdb_config_count`).\n\nThe result name or description MUST NOT be freed.\n\n index: The index of the configuration option (between 0 and `duckdb_config_count`)\n out_name: A name of the configuration flag.\n out_description: A description of the configuration flag.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_get_config_flag( index: usize, out_name: *mut *const ::std::os::raw::c_char, @@ -11236,17 +11708,7 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Sets the specified option for the specified configuration. The configuration option is indicated by name."] - #[doc = "To obtain a list of config options, see `duckdb_get_config_flag`."] - #[doc = ""] - #[doc = "In the source code, configuration options are defined in `config.cpp`."] - #[doc = ""] - #[doc = "This can fail if either the name is invalid, or if the value provided for the option is invalid."] - #[doc = ""] - #[doc = " duckdb_config: The configuration object to set the option on."] - #[doc = " name: The name of the configuration flag to set."] - #[doc = " option: The value to set the configuration flag to."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Sets the specified option for the specified configuration. The configuration option is indicated by name.\nTo obtain a list of config options, see `duckdb_get_config_flag`.\n\nIn the source code, configuration options are defined in `config.cpp`.\n\nThis can fail if either the name is invalid, or if the value provided for the option is invalid.\n\n duckdb_config: The configuration object to set the option on.\n name: The name of the configuration flag to set.\n option: The value to set the configuration flag to.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_set_config( config: duckdb_config, name: *const ::std::os::raw::c_char, @@ -11254,23 +11716,11 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Destroys the specified configuration option and de-allocates all memory allocated for the object."] - #[doc = ""] - #[doc = " config: The configuration object to destroy."] + #[doc = "Destroys the specified configuration option and de-allocates all memory allocated for the object.\n\n config: The configuration object to destroy."] pub fn duckdb_destroy_config(config: *mut duckdb_config); } extern "C" { - #[doc = "Executes a SQL query within a connection and stores the full (materialized) result in the out_result pointer."] - #[doc = "If the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling"] - #[doc = "`duckdb_result_error`."] - #[doc = ""] - #[doc = "Note that after running `duckdb_query`, `duckdb_destroy_result` must be called on the result object even if the"] - #[doc = "query fails, otherwise the error stored within the result will not be freed correctly."] - #[doc = ""] - #[doc = " connection: The connection to perform the query in."] - #[doc = " query: The SQL query to run."] - #[doc = " out_result: The query result."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Executes a SQL query within a connection and stores the full (materialized) result in the out_result pointer.\nIf the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling\n`duckdb_result_error`.\n\nNote that after running `duckdb_query`, `duckdb_destroy_result` must be called on the result object even if the\nquery fails, otherwise the error stored within the result will not be freed correctly.\n\n connection: The connection to perform the query in.\n query: The SQL query to run.\n out_result: The query result.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_query( connection: duckdb_connection, query: *const ::std::os::raw::c_char, @@ -11278,138 +11728,51 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Closes the result and de-allocates all memory allocated for that connection."] - #[doc = ""] - #[doc = " result: The result to destroy."] + #[doc = "Closes the result and de-allocates all memory allocated for that connection.\n\n result: The result to destroy."] pub fn duckdb_destroy_result(result: *mut duckdb_result); } extern "C" { - #[doc = "Returns the column name of the specified column. The result should not need be freed; the column names will"] - #[doc = "automatically be destroyed when the result is destroyed."] - #[doc = ""] - #[doc = "Returns `NULL` if the column is out of range."] - #[doc = ""] - #[doc = " result: The result object to fetch the column name from."] - #[doc = " col: The column index."] - #[doc = " returns: The column name of the specified column."] + #[doc = "Returns the column name of the specified column. The result should not need be freed; the column names will\nautomatically be destroyed when the result is destroyed.\n\nReturns `NULL` if the column is out of range.\n\n result: The result object to fetch the column name from.\n col: The column index.\n returns: The column name of the specified column."] pub fn duckdb_column_name(result: *mut duckdb_result, col: idx_t) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Returns the column type of the specified column."] - #[doc = ""] - #[doc = "Returns `DUCKDB_TYPE_INVALID` if the column is out of range."] - #[doc = ""] - #[doc = " result: The result object to fetch the column type from."] - #[doc = " col: The column index."] - #[doc = " returns: The column type of the specified column."] + #[doc = "Returns the column type of the specified column.\n\nReturns `DUCKDB_TYPE_INVALID` if the column is out of range.\n\n result: The result object to fetch the column type from.\n col: The column index.\n returns: The column type of the specified column."] pub fn duckdb_column_type(result: *mut duckdb_result, col: idx_t) -> duckdb_type; } extern "C" { - #[doc = "Returns the logical column type of the specified column."] - #[doc = ""] - #[doc = "The return type of this call should be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = "Returns `NULL` if the column is out of range."] - #[doc = ""] - #[doc = " result: The result object to fetch the column type from."] - #[doc = " col: The column index."] - #[doc = " returns: The logical column type of the specified column."] + #[doc = "Returns the logical column type of the specified column.\n\nThe return type of this call should be destroyed with `duckdb_destroy_logical_type`.\n\nReturns `NULL` if the column is out of range.\n\n result: The result object to fetch the column type from.\n col: The column index.\n returns: The logical column type of the specified column."] pub fn duckdb_column_logical_type(result: *mut duckdb_result, col: idx_t) -> duckdb_logical_type; } extern "C" { - #[doc = "Returns the number of columns present in a the result object."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of columns present in the result object."] + #[doc = "Returns the number of columns present in a the result object.\n\n result: The result object.\n returns: The number of columns present in the result object."] pub fn duckdb_column_count(result: *mut duckdb_result) -> idx_t; } extern "C" { - #[doc = "Returns the number of rows present in a the result object."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of rows present in the result object."] + #[doc = "Returns the number of rows present in a the result object.\n\n result: The result object.\n returns: The number of rows present in the result object."] pub fn duckdb_row_count(result: *mut duckdb_result) -> idx_t; } extern "C" { - #[doc = "Returns the number of rows changed by the query stored in the result. This is relevant only for INSERT/UPDATE/DELETE"] - #[doc = "queries. For other queries the rows_changed will be 0."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of rows changed."] + #[doc = "Returns the number of rows changed by the query stored in the result. This is relevant only for INSERT/UPDATE/DELETE\nqueries. For other queries the rows_changed will be 0.\n\n result: The result object.\n returns: The number of rows changed."] pub fn duckdb_rows_changed(result: *mut duckdb_result) -> idx_t; } extern "C" { - #[doc = "DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead."] - #[doc = ""] - #[doc = "Returns the data of a specific column of a result in columnar format."] - #[doc = ""] - #[doc = "The function returns a dense array which contains the result data. The exact type stored in the array depends on the"] - #[doc = "corresponding duckdb_type (as provided by `duckdb_column_type`). For the exact type by which the data should be"] - #[doc = "accessed, see the comments in [the types section](types) or the `DUCKDB_TYPE` enum."] - #[doc = ""] - #[doc = "For example, for a column of type `DUCKDB_TYPE_INTEGER`, rows can be accessed in the following manner:"] - #[doc = "```c"] - #[doc = "int32_t *data = (int32_t *) duckdb_column_data(&result, 0);"] - #[doc = "printf(\"Data for row %d: %d\\n\", row, data[row]);"] - #[doc = "```"] - #[doc = ""] - #[doc = " result: The result object to fetch the column data from."] - #[doc = " col: The column index."] - #[doc = " returns: The column data of the specified column."] + #[doc = "DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead.\n\nReturns the data of a specific column of a result in columnar format.\n\nThe function returns a dense array which contains the result data. The exact type stored in the array depends on the\ncorresponding duckdb_type (as provided by `duckdb_column_type`). For the exact type by which the data should be\naccessed, see the comments in [the types section](types) or the `DUCKDB_TYPE` enum.\n\nFor example, for a column of type `DUCKDB_TYPE_INTEGER`, rows can be accessed in the following manner:\n```c\nint32_t *data = (int32_t *) duckdb_column_data(&result, 0);\nprintf(\"Data for row %d: %d\\n\", row, data[row]);\n```\n\n result: The result object to fetch the column data from.\n col: The column index.\n returns: The column data of the specified column."] pub fn duckdb_column_data(result: *mut duckdb_result, col: idx_t) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead."] - #[doc = ""] - #[doc = "Returns the nullmask of a specific column of a result in columnar format. The nullmask indicates for every row"] - #[doc = "whether or not the corresponding row is `NULL`. If a row is `NULL`, the values present in the array provided"] - #[doc = "by `duckdb_column_data` are undefined."] - #[doc = ""] - #[doc = "```c"] - #[doc = "int32_t *data = (int32_t *) duckdb_column_data(&result, 0);"] - #[doc = "bool *nullmask = duckdb_nullmask_data(&result, 0);"] - #[doc = "if (nullmask[row]) {"] - #[doc = "printf(\"Data for row %d: NULL\\n\", row);"] - #[doc = "} else {"] - #[doc = "printf(\"Data for row %d: %d\\n\", row, data[row]);"] - #[doc = "}"] - #[doc = "```"] - #[doc = ""] - #[doc = " result: The result object to fetch the nullmask from."] - #[doc = " col: The column index."] - #[doc = " returns: The nullmask of the specified column."] + #[doc = "DEPRECATED**: Prefer using `duckdb_result_get_chunk` instead.\n\nReturns the nullmask of a specific column of a result in columnar format. The nullmask indicates for every row\nwhether or not the corresponding row is `NULL`. If a row is `NULL`, the values present in the array provided\nby `duckdb_column_data` are undefined.\n\n```c\nint32_t *data = (int32_t *) duckdb_column_data(&result, 0);\nbool *nullmask = duckdb_nullmask_data(&result, 0);\nif (nullmask[row]) {\nprintf(\"Data for row %d: NULL\\n\", row);\n} else {\nprintf(\"Data for row %d: %d\\n\", row, data[row]);\n}\n```\n\n result: The result object to fetch the nullmask from.\n col: The column index.\n returns: The nullmask of the specified column."] pub fn duckdb_nullmask_data(result: *mut duckdb_result, col: idx_t) -> *mut bool; } extern "C" { - #[doc = "Returns the error message contained within the result. The error is only set if `duckdb_query` returns `DuckDBError`."] - #[doc = ""] - #[doc = "The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_result` is called."] - #[doc = ""] - #[doc = " result: The result object to fetch the error from."] - #[doc = " returns: The error of the result."] + #[doc = "Returns the error message contained within the result. The error is only set if `duckdb_query` returns `DuckDBError`.\n\nThe result of this function must not be freed. It will be cleaned up when `duckdb_destroy_result` is called.\n\n result: The result object to fetch the error from.\n returns: The error of the result."] pub fn duckdb_result_error(result: *mut duckdb_result) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Fetches a data chunk from the duckdb_result. This function should be called repeatedly until the result is exhausted."] - #[doc = ""] - #[doc = "This function supersedes all `duckdb_value` functions, as well as the `duckdb_column_data` and `duckdb_nullmask_data`"] - #[doc = "functions. It results in significantly better performance, and should be preferred in newer code-bases."] - #[doc = ""] - #[doc = "If this function is used, none of the other result functions can be used and vice versa (i.e. this function cannot be"] - #[doc = "mixed with the legacy result functions)."] - #[doc = ""] - #[doc = "Use `duckdb_result_chunk_count` to figure out how many chunks there are in the result."] - #[doc = ""] - #[doc = " result: The result object to fetch the data chunk from."] - #[doc = " chunk_index: The chunk index to fetch from."] - #[doc = " returns: The resulting data chunk. Returns `NULL` if the chunk index is out of bounds."] + #[doc = "Fetches a data chunk from the duckdb_result. This function should be called repeatedly until the result is exhausted.\n\nThe result must be destroyed with `duckdb_destroy_data_chunk`.\n\nThis function supersedes all `duckdb_value` functions, as well as the `duckdb_column_data` and `duckdb_nullmask_data`\nfunctions. It results in significantly better performance, and should be preferred in newer code-bases.\n\nIf this function is used, none of the other result functions can be used and vice versa (i.e. this function cannot be\nmixed with the legacy result functions).\n\nUse `duckdb_result_chunk_count` to figure out how many chunks there are in the result.\n\n result: The result object to fetch the data chunk from.\n chunk_index: The chunk index to fetch from.\n returns: The resulting data chunk. Returns `NULL` if the chunk index is out of bounds."] pub fn duckdb_result_get_chunk(result: duckdb_result, chunk_index: idx_t) -> duckdb_data_chunk; } extern "C" { - #[doc = "Returns the number of data chunks present in the result."] - #[doc = ""] - #[doc = " result: The result object"] - #[doc = " returns: The resulting data chunk. Returns `NULL` if the chunk index is out of bounds."] + #[doc = "Returns the number of data chunks present in the result.\n\n result: The result object\n returns: The resulting data chunk. Returns `NULL` if the chunk index is out of bounds."] pub fn duckdb_result_chunk_count(result: duckdb_result) -> idx_t; } extern "C" { @@ -11481,24 +11844,15 @@ extern "C" { pub fn duckdb_value_interval(result: *mut duckdb_result, col: idx_t, row: idx_t) -> duckdb_interval; } extern "C" { - #[doc = " DEPRECATED: use duckdb_value_string instead. This function does not work correctly if the string contains null bytes."] - #[doc = " returns: The text value at the specified location as a null-terminated string, or nullptr if the value cannot be"] - #[doc = "converted. The result must be freed with `duckdb_free`."] + #[doc = " DEPRECATED: use duckdb_value_string instead. This function does not work correctly if the string contains null bytes.\n returns: The text value at the specified location as a null-terminated string, or nullptr if the value cannot be\nconverted. The result must be freed with `duckdb_free`."] pub fn duckdb_value_varchar(result: *mut duckdb_result, col: idx_t, row: idx_t) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = "s"] - #[doc = " returns: The string value at the specified location."] - #[doc = "The result must be freed with `duckdb_free`."] + #[doc = "s\n returns: The string value at the specified location.\nThe result must be freed with `duckdb_free`."] pub fn duckdb_value_string(result: *mut duckdb_result, col: idx_t, row: idx_t) -> duckdb_string; } extern "C" { - #[doc = " DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains"] - #[doc = "null bytes."] - #[doc = " returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast."] - #[doc = "If the column is NOT a VARCHAR column this function will return NULL."] - #[doc = ""] - #[doc = "The result must NOT be freed."] + #[doc = " DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains\nnull bytes.\n returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.\nIf the column is NOT a VARCHAR column this function will return NULL.\n\nThe result must NOT be freed."] pub fn duckdb_value_varchar_internal( result: *mut duckdb_result, col: idx_t, @@ -11506,17 +11860,11 @@ extern "C" { ) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = " DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains"] - #[doc = "null bytes."] - #[doc = " returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast."] - #[doc = "If the column is NOT a VARCHAR column this function will return NULL."] - #[doc = ""] - #[doc = "The result must NOT be freed."] + #[doc = " DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains\nnull bytes.\n returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.\nIf the column is NOT a VARCHAR column this function will return NULL.\n\nThe result must NOT be freed."] pub fn duckdb_value_string_internal(result: *mut duckdb_result, col: idx_t, row: idx_t) -> duckdb_string; } extern "C" { - #[doc = " returns: The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the"] - #[doc = "value cannot be converted. The resulting \"blob.data\" must be freed with `duckdb_free.`"] + #[doc = " returns: The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the\nvalue cannot be converted. The resulting \"blob.data\" must be freed with `duckdb_free.`"] pub fn duckdb_value_blob(result: *mut duckdb_result, col: idx_t, row: idx_t) -> duckdb_blob; } extern "C" { @@ -11524,112 +11872,59 @@ extern "C" { pub fn duckdb_value_is_null(result: *mut duckdb_result, col: idx_t, row: idx_t) -> bool; } extern "C" { - #[doc = "Allocate `size` bytes of memory using the duckdb internal malloc function. Any memory allocated in this manner"] - #[doc = "should be freed using `duckdb_free`."] - #[doc = ""] - #[doc = " size: The number of bytes to allocate."] - #[doc = " returns: A pointer to the allocated memory region."] + #[doc = "Allocate `size` bytes of memory using the duckdb internal malloc function. Any memory allocated in this manner\nshould be freed using `duckdb_free`.\n\n size: The number of bytes to allocate.\n returns: A pointer to the allocated memory region."] pub fn duckdb_malloc(size: usize) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Free a value returned from `duckdb_malloc`, `duckdb_value_varchar` or `duckdb_value_blob`."] - #[doc = ""] - #[doc = " ptr: The memory region to de-allocate."] + #[doc = "Free a value returned from `duckdb_malloc`, `duckdb_value_varchar` or `duckdb_value_blob`.\n\n ptr: The memory region to de-allocate."] pub fn duckdb_free(ptr: *mut ::std::os::raw::c_void); } extern "C" { - #[doc = "The internal vector size used by DuckDB."] - #[doc = "This is the amount of tuples that will fit into a data chunk created by `duckdb_create_data_chunk`."] - #[doc = ""] - #[doc = " returns: The vector size."] + #[doc = "The internal vector size used by DuckDB.\nThis is the amount of tuples that will fit into a data chunk created by `duckdb_create_data_chunk`.\n\n returns: The vector size."] pub fn duckdb_vector_size() -> idx_t; } extern "C" { - #[doc = "Decompose a `duckdb_date` object into year, month and date (stored as `duckdb_date_struct`)."] - #[doc = ""] - #[doc = " date: The date object, as obtained from a `DUCKDB_TYPE_DATE` column."] - #[doc = " returns: The `duckdb_date_struct` with the decomposed elements."] + #[doc = "Decompose a `duckdb_date` object into year, month and date (stored as `duckdb_date_struct`).\n\n date: The date object, as obtained from a `DUCKDB_TYPE_DATE` column.\n returns: The `duckdb_date_struct` with the decomposed elements."] pub fn duckdb_from_date(date: duckdb_date) -> duckdb_date_struct; } extern "C" { - #[doc = "Re-compose a `duckdb_date` from year, month and date (`duckdb_date_struct`)."] - #[doc = ""] - #[doc = " date: The year, month and date stored in a `duckdb_date_struct`."] - #[doc = " returns: The `duckdb_date` element."] + #[doc = "Re-compose a `duckdb_date` from year, month and date (`duckdb_date_struct`).\n\n date: The year, month and date stored in a `duckdb_date_struct`.\n returns: The `duckdb_date` element."] pub fn duckdb_to_date(date: duckdb_date_struct) -> duckdb_date; } extern "C" { - #[doc = "Decompose a `duckdb_time` object into hour, minute, second and microsecond (stored as `duckdb_time_struct`)."] - #[doc = ""] - #[doc = " time: The time object, as obtained from a `DUCKDB_TYPE_TIME` column."] - #[doc = " returns: The `duckdb_time_struct` with the decomposed elements."] + #[doc = "Decompose a `duckdb_time` object into hour, minute, second and microsecond (stored as `duckdb_time_struct`).\n\n time: The time object, as obtained from a `DUCKDB_TYPE_TIME` column.\n returns: The `duckdb_time_struct` with the decomposed elements."] pub fn duckdb_from_time(time: duckdb_time) -> duckdb_time_struct; } extern "C" { - #[doc = "Re-compose a `duckdb_time` from hour, minute, second and microsecond (`duckdb_time_struct`)."] - #[doc = ""] - #[doc = " time: The hour, minute, second and microsecond in a `duckdb_time_struct`."] - #[doc = " returns: The `duckdb_time` element."] + #[doc = "Re-compose a `duckdb_time` from hour, minute, second and microsecond (`duckdb_time_struct`).\n\n time: The hour, minute, second and microsecond in a `duckdb_time_struct`.\n returns: The `duckdb_time` element."] pub fn duckdb_to_time(time: duckdb_time_struct) -> duckdb_time; } extern "C" { - #[doc = "Decompose a `duckdb_timestamp` object into a `duckdb_timestamp_struct`."] - #[doc = ""] - #[doc = " ts: The ts object, as obtained from a `DUCKDB_TYPE_TIMESTAMP` column."] - #[doc = " returns: The `duckdb_timestamp_struct` with the decomposed elements."] + #[doc = "Decompose a `duckdb_timestamp` object into a `duckdb_timestamp_struct`.\n\n ts: The ts object, as obtained from a `DUCKDB_TYPE_TIMESTAMP` column.\n returns: The `duckdb_timestamp_struct` with the decomposed elements."] pub fn duckdb_from_timestamp(ts: duckdb_timestamp) -> duckdb_timestamp_struct; } extern "C" { - #[doc = "Re-compose a `duckdb_timestamp` from a duckdb_timestamp_struct."] - #[doc = ""] - #[doc = " ts: The de-composed elements in a `duckdb_timestamp_struct`."] - #[doc = " returns: The `duckdb_timestamp` element."] + #[doc = "Re-compose a `duckdb_timestamp` from a duckdb_timestamp_struct.\n\n ts: The de-composed elements in a `duckdb_timestamp_struct`.\n returns: The `duckdb_timestamp` element."] pub fn duckdb_to_timestamp(ts: duckdb_timestamp_struct) -> duckdb_timestamp; } extern "C" { - #[doc = "Converts a duckdb_hugeint object (as obtained from a `DUCKDB_TYPE_HUGEINT` column) into a double."] - #[doc = ""] - #[doc = " val: The hugeint value."] - #[doc = " returns: The converted `double` element."] + #[doc = "Converts a duckdb_hugeint object (as obtained from a `DUCKDB_TYPE_HUGEINT` column) into a double.\n\n val: The hugeint value.\n returns: The converted `double` element."] pub fn duckdb_hugeint_to_double(val: duckdb_hugeint) -> f64; } extern "C" { - #[doc = "Converts a double value to a duckdb_hugeint object."] - #[doc = ""] - #[doc = "If the conversion fails because the double value is too big the result will be 0."] - #[doc = ""] - #[doc = " val: The double value."] - #[doc = " returns: The converted `duckdb_hugeint` element."] + #[doc = "Converts a double value to a duckdb_hugeint object.\n\nIf the conversion fails because the double value is too big the result will be 0.\n\n val: The double value.\n returns: The converted `duckdb_hugeint` element."] pub fn duckdb_double_to_hugeint(val: f64) -> duckdb_hugeint; } extern "C" { - #[doc = "Converts a double value to a duckdb_decimal object."] - #[doc = ""] - #[doc = "If the conversion fails because the double value is too big, or the width/scale are invalid the result will be 0."] - #[doc = ""] - #[doc = " val: The double value."] - #[doc = " returns: The converted `duckdb_decimal` element."] + #[doc = "Converts a double value to a duckdb_decimal object.\n\nIf the conversion fails because the double value is too big, or the width/scale are invalid the result will be 0.\n\n val: The double value.\n returns: The converted `duckdb_decimal` element."] pub fn duckdb_double_to_decimal(val: f64, width: u8, scale: u8) -> duckdb_decimal; } extern "C" { - #[doc = "Converts a duckdb_decimal object (as obtained from a `DUCKDB_TYPE_DECIMAL` column) into a double."] - #[doc = ""] - #[doc = " val: The decimal value."] - #[doc = " returns: The converted `double` element."] + #[doc = "Converts a duckdb_decimal object (as obtained from a `DUCKDB_TYPE_DECIMAL` column) into a double.\n\n val: The decimal value.\n returns: The converted `double` element."] pub fn duckdb_decimal_to_double(val: duckdb_decimal) -> f64; } extern "C" { - #[doc = "Create a prepared statement object from a query."] - #[doc = ""] - #[doc = "Note that after calling `duckdb_prepare`, the prepared statement should always be destroyed using"] - #[doc = "`duckdb_destroy_prepare`, even if the prepare fails."] - #[doc = ""] - #[doc = "If the prepare fails, `duckdb_prepare_error` can be called to obtain the reason why the prepare failed."] - #[doc = ""] - #[doc = " connection: The connection object"] - #[doc = " query: The SQL query to prepare"] - #[doc = " out_prepared_statement: The resulting prepared statement object"] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Create a prepared statement object from a query.\n\nNote that after calling `duckdb_prepare`, the prepared statement should always be destroyed using\n`duckdb_destroy_prepare`, even if the prepare fails.\n\nIf the prepare fails, `duckdb_prepare_error` can be called to obtain the reason why the prepare failed.\n\n connection: The connection object\n query: The SQL query to prepare\n out_prepared_statement: The resulting prepared statement object\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_prepare( connection: duckdb_connection, query: *const ::std::os::raw::c_char, @@ -11637,37 +11932,19 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Closes the prepared statement and de-allocates all memory allocated for the statement."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to destroy."] + #[doc = "Closes the prepared statement and de-allocates all memory allocated for the statement.\n\n prepared_statement: The prepared statement to destroy."] pub fn duckdb_destroy_prepare(prepared_statement: *mut duckdb_prepared_statement); } extern "C" { - #[doc = "Returns the error message associated with the given prepared statement."] - #[doc = "If the prepared statement has no error message, this returns `nullptr` instead."] - #[doc = ""] - #[doc = "The error message should not be freed. It will be de-allocated when `duckdb_destroy_prepare` is called."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to obtain the error from."] - #[doc = " returns: The error message, or `nullptr` if there is none."] + #[doc = "Returns the error message associated with the given prepared statement.\nIf the prepared statement has no error message, this returns `nullptr` instead.\n\nThe error message should not be freed. It will be de-allocated when `duckdb_destroy_prepare` is called.\n\n prepared_statement: The prepared statement to obtain the error from.\n returns: The error message, or `nullptr` if there is none."] pub fn duckdb_prepare_error(prepared_statement: duckdb_prepared_statement) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Returns the number of parameters that can be provided to the given prepared statement."] - #[doc = ""] - #[doc = "Returns 0 if the query was not successfully prepared."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to obtain the number of parameters for."] + #[doc = "Returns the number of parameters that can be provided to the given prepared statement.\n\nReturns 0 if the query was not successfully prepared.\n\n prepared_statement: The prepared statement to obtain the number of parameters for."] pub fn duckdb_nparams(prepared_statement: duckdb_prepared_statement) -> idx_t; } extern "C" { - #[doc = "Returns the parameter type for the parameter at the given index."] - #[doc = ""] - #[doc = "Returns `DUCKDB_TYPE_INVALID` if the parameter index is out of range or the statement was not successfully prepared."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement."] - #[doc = " param_idx: The parameter index."] - #[doc = " returns: The parameter type"] + #[doc = "Returns the parameter type for the parameter at the given index.\n\nReturns `DUCKDB_TYPE_INVALID` if the parameter index is out of range or the statement was not successfully prepared.\n\n prepared_statement: The prepared statement.\n param_idx: The parameter index.\n returns: The parameter type"] pub fn duckdb_param_type(prepared_statement: duckdb_prepared_statement, param_idx: idx_t) -> duckdb_type; } extern "C" { @@ -11821,387 +12098,190 @@ extern "C" { pub fn duckdb_bind_null(prepared_statement: duckdb_prepared_statement, param_idx: idx_t) -> duckdb_state; } extern "C" { - #[doc = "Executes the prepared statement with the given bound parameters, and returns a materialized query result."] - #[doc = ""] - #[doc = "This method can be called multiple times for each prepared statement, and the parameters can be modified"] - #[doc = "between calls to this function."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to execute."] - #[doc = " out_result: The query result."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Executes the prepared statement with the given bound parameters, and returns a materialized query result.\n\nThis method can be called multiple times for each prepared statement, and the parameters can be modified\nbetween calls to this function.\n\n prepared_statement: The prepared statement to execute.\n out_result: The query result.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_execute_prepared( prepared_statement: duckdb_prepared_statement, out_result: *mut duckdb_result, ) -> duckdb_state; } extern "C" { - #[doc = "Executes the prepared statement with the given bound parameters, and returns an arrow query result."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to execute."] - #[doc = " out_result: The query result."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Executes the prepared statement with the given bound parameters, and returns an arrow query result.\n\n prepared_statement: The prepared statement to execute.\n out_result: The query result.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_execute_prepared_arrow( prepared_statement: duckdb_prepared_statement, out_result: *mut duckdb_arrow, ) -> duckdb_state; } extern "C" { - #[doc = "Executes the prepared statement with the given bound parameters, and returns a pending result."] - #[doc = "The pending result represents an intermediate structure for a query that is not yet fully executed."] - #[doc = "The pending result can be used to incrementally execute a query, returning control to the client between tasks."] - #[doc = ""] - #[doc = "Note that after calling `duckdb_pending_prepared`, the pending result should always be destroyed using"] - #[doc = "`duckdb_destroy_pending`, even if this function returns DuckDBError."] - #[doc = ""] - #[doc = " prepared_statement: The prepared statement to execute."] - #[doc = " out_result: The pending query result."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Executes the prepared statement with the given bound parameters, and returns a pending result.\nThe pending result represents an intermediate structure for a query that is not yet fully executed.\nThe pending result can be used to incrementally execute a query, returning control to the client between tasks.\n\nNote that after calling `duckdb_pending_prepared`, the pending result should always be destroyed using\n`duckdb_destroy_pending`, even if this function returns DuckDBError.\n\n prepared_statement: The prepared statement to execute.\n out_result: The pending query result.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_pending_prepared( prepared_statement: duckdb_prepared_statement, out_result: *mut duckdb_pending_result, ) -> duckdb_state; } extern "C" { - #[doc = "Closes the pending result and de-allocates all memory allocated for the result."] - #[doc = ""] - #[doc = " pending_result: The pending result to destroy."] + #[doc = "Closes the pending result and de-allocates all memory allocated for the result.\n\n pending_result: The pending result to destroy."] pub fn duckdb_destroy_pending(pending_result: *mut duckdb_pending_result); } extern "C" { - #[doc = "Returns the error message contained within the pending result."] - #[doc = ""] - #[doc = "The result of this function must not be freed. It will be cleaned up when `duckdb_destroy_pending` is called."] - #[doc = ""] - #[doc = " result: The pending result to fetch the error from."] - #[doc = " returns: The error of the pending result."] + #[doc = "Returns the error message contained within the pending result.\n\nThe result of this function must not be freed. It will be cleaned up when `duckdb_destroy_pending` is called.\n\n result: The pending result to fetch the error from.\n returns: The error of the pending result."] pub fn duckdb_pending_error(pending_result: duckdb_pending_result) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Executes a single task within the query, returning whether or not the query is ready."] - #[doc = ""] - #[doc = "If this returns DUCKDB_PENDING_RESULT_READY, the duckdb_execute_pending function can be called to obtain the result."] - #[doc = "If this returns DUCKDB_PENDING_RESULT_NOT_READY, the duckdb_pending_execute_task function should be called again."] - #[doc = "If this returns DUCKDB_PENDING_ERROR, an error occurred during execution."] - #[doc = ""] - #[doc = "The error message can be obtained by calling duckdb_pending_error on the pending_result."] - #[doc = ""] - #[doc = " pending_result: The pending result to execute a task within.."] - #[doc = " returns: The state of the pending result after the execution."] + #[doc = "Executes a single task within the query, returning whether or not the query is ready.\n\nIf this returns DUCKDB_PENDING_RESULT_READY, the duckdb_execute_pending function can be called to obtain the result.\nIf this returns DUCKDB_PENDING_RESULT_NOT_READY, the duckdb_pending_execute_task function should be called again.\nIf this returns DUCKDB_PENDING_ERROR, an error occurred during execution.\n\nThe error message can be obtained by calling duckdb_pending_error on the pending_result.\n\n pending_result: The pending result to execute a task within..\n returns: The state of the pending result after the execution."] pub fn duckdb_pending_execute_task(pending_result: duckdb_pending_result) -> duckdb_pending_state; } extern "C" { - #[doc = "Fully execute a pending query result, returning the final query result."] - #[doc = ""] - #[doc = "If duckdb_pending_execute_task has been called until DUCKDB_PENDING_RESULT_READY was returned, this will return fast."] - #[doc = "Otherwise, all remaining tasks must be executed first."] - #[doc = ""] - #[doc = " pending_result: The pending result to execute."] - #[doc = " out_result: The result object."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Fully execute a pending query result, returning the final query result.\n\nIf duckdb_pending_execute_task has been called until DUCKDB_PENDING_RESULT_READY was returned, this will return fast.\nOtherwise, all remaining tasks must be executed first.\n\n pending_result: The pending result to execute.\n out_result: The result object.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_execute_pending( pending_result: duckdb_pending_result, out_result: *mut duckdb_result, ) -> duckdb_state; } extern "C" { - #[doc = "Destroys the value and de-allocates all memory allocated for that type."] - #[doc = ""] - #[doc = " value: The value to destroy."] + #[doc = "Destroys the value and de-allocates all memory allocated for that type.\n\n value: The value to destroy."] pub fn duckdb_destroy_value(value: *mut duckdb_value); } extern "C" { - #[doc = "Creates a value from a null-terminated string"] - #[doc = ""] - #[doc = " value: The null-terminated string"] - #[doc = " returns: The value. This must be destroyed with `duckdb_destroy_value`."] + #[doc = "Creates a value from a null-terminated string\n\n value: The null-terminated string\n returns: The value. This must be destroyed with `duckdb_destroy_value`."] pub fn duckdb_create_varchar(text: *const ::std::os::raw::c_char) -> duckdb_value; } extern "C" { - #[doc = "Creates a value from a string"] - #[doc = ""] - #[doc = " value: The text"] - #[doc = " length: The length of the text"] - #[doc = " returns: The value. This must be destroyed with `duckdb_destroy_value`."] + #[doc = "Creates a value from a string\n\n value: The text\n length: The length of the text\n returns: The value. This must be destroyed with `duckdb_destroy_value`."] pub fn duckdb_create_varchar_length(text: *const ::std::os::raw::c_char, length: idx_t) -> duckdb_value; } extern "C" { - #[doc = "Creates a value from an int64"] - #[doc = ""] - #[doc = " value: The bigint value"] - #[doc = " returns: The value. This must be destroyed with `duckdb_destroy_value`."] + #[doc = "Creates a value from an int64\n\n value: The bigint value\n returns: The value. This must be destroyed with `duckdb_destroy_value`."] pub fn duckdb_create_int64(val: i64) -> duckdb_value; } extern "C" { - #[doc = "Obtains a string representation of the given value."] - #[doc = "The result must be destroyed with `duckdb_free`."] - #[doc = ""] - #[doc = " value: The value"] - #[doc = " returns: The string value. This must be destroyed with `duckdb_free`."] + #[doc = "Obtains a string representation of the given value.\nThe result must be destroyed with `duckdb_free`.\n\n value: The value\n returns: The string value. This must be destroyed with `duckdb_free`."] pub fn duckdb_get_varchar(value: duckdb_value) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = "Obtains an int64 of the given value."] - #[doc = ""] - #[doc = " value: The value"] - #[doc = " returns: The int64 value, or 0 if no conversion is possible"] + #[doc = "Obtains an int64 of the given value.\n\n value: The value\n returns: The int64 value, or 0 if no conversion is possible"] pub fn duckdb_get_int64(value: duckdb_value) -> i64; } extern "C" { - #[doc = "Creates a `duckdb_logical_type` from a standard primitive type."] - #[doc = "The resulting type should be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = "This should not be used with `DUCKDB_TYPE_DECIMAL`."] - #[doc = ""] - #[doc = " type: The primitive type to create."] - #[doc = " returns: The logical type."] + #[doc = "Creates a `duckdb_logical_type` from a standard primitive type.\nThe resulting type should be destroyed with `duckdb_destroy_logical_type`.\n\nThis should not be used with `DUCKDB_TYPE_DECIMAL`.\n\n type: The primitive type to create.\n returns: The logical type."] pub fn duckdb_create_logical_type(type_: duckdb_type) -> duckdb_logical_type; } extern "C" { - #[doc = "Creates a list type from its child type."] - #[doc = "The resulting type should be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = " type: The child type of list type to create."] - #[doc = " returns: The logical type."] + #[doc = "Creates a list type from its child type.\nThe resulting type should be destroyed with `duckdb_destroy_logical_type`.\n\n type: The child type of list type to create.\n returns: The logical type."] pub fn duckdb_create_list_type(type_: duckdb_logical_type) -> duckdb_logical_type; } extern "C" { - #[doc = "Creates a map type from its key type and value type."] - #[doc = "The resulting type should be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = " type: The key type and value type of map type to create."] - #[doc = " returns: The logical type."] + #[doc = "Creates a map type from its key type and value type.\nThe resulting type should be destroyed with `duckdb_destroy_logical_type`.\n\n type: The key type and value type of map type to create.\n returns: The logical type."] pub fn duckdb_create_map_type( key_type: duckdb_logical_type, value_type: duckdb_logical_type, ) -> duckdb_logical_type; } extern "C" { - #[doc = "Creates a `duckdb_logical_type` of type decimal with the specified width and scale"] - #[doc = "The resulting type should be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = " width: The width of the decimal type"] - #[doc = " scale: The scale of the decimal type"] - #[doc = " returns: The logical type."] + #[doc = "Creates a `duckdb_logical_type` of type decimal with the specified width and scale\nThe resulting type should be destroyed with `duckdb_destroy_logical_type`.\n\n width: The width of the decimal type\n scale: The scale of the decimal type\n returns: The logical type."] pub fn duckdb_create_decimal_type(width: u8, scale: u8) -> duckdb_logical_type; } extern "C" { - #[doc = "Retrieves the type class of a `duckdb_logical_type`."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The type id"] + #[doc = "Retrieves the type class of a `duckdb_logical_type`.\n\n type: The logical type object\n returns: The type id"] pub fn duckdb_get_type_id(type_: duckdb_logical_type) -> duckdb_type; } extern "C" { - #[doc = "Retrieves the width of a decimal type."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The width of the decimal type"] + #[doc = "Retrieves the width of a decimal type.\n\n type: The logical type object\n returns: The width of the decimal type"] pub fn duckdb_decimal_width(type_: duckdb_logical_type) -> u8; } extern "C" { - #[doc = "Retrieves the scale of a decimal type."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The scale of the decimal type"] + #[doc = "Retrieves the scale of a decimal type.\n\n type: The logical type object\n returns: The scale of the decimal type"] pub fn duckdb_decimal_scale(type_: duckdb_logical_type) -> u8; } extern "C" { - #[doc = "Retrieves the internal storage type of a decimal type."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The internal type of the decimal type"] + #[doc = "Retrieves the internal storage type of a decimal type.\n\n type: The logical type object\n returns: The internal type of the decimal type"] pub fn duckdb_decimal_internal_type(type_: duckdb_logical_type) -> duckdb_type; } extern "C" { - #[doc = "Retrieves the internal storage type of an enum type."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The internal type of the enum type"] + #[doc = "Retrieves the internal storage type of an enum type.\n\n type: The logical type object\n returns: The internal type of the enum type"] pub fn duckdb_enum_internal_type(type_: duckdb_logical_type) -> duckdb_type; } extern "C" { - #[doc = "Retrieves the dictionary size of the enum type"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The dictionary size of the enum type"] + #[doc = "Retrieves the dictionary size of the enum type\n\n type: The logical type object\n returns: The dictionary size of the enum type"] pub fn duckdb_enum_dictionary_size(type_: duckdb_logical_type) -> u32; } extern "C" { - #[doc = "Retrieves the dictionary value at the specified position from the enum."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_free`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " index: The index in the dictionary"] - #[doc = " returns: The string value of the enum type. Must be freed with `duckdb_free`."] + #[doc = "Retrieves the dictionary value at the specified position from the enum.\n\nThe result must be freed with `duckdb_free`\n\n type: The logical type object\n index: The index in the dictionary\n returns: The string value of the enum type. Must be freed with `duckdb_free`."] pub fn duckdb_enum_dictionary_value(type_: duckdb_logical_type, index: idx_t) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = "Retrieves the child type of the given list type."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_destroy_logical_type`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The child type of the list type. Must be destroyed with `duckdb_destroy_logical_type`."] + #[doc = "Retrieves the child type of the given list type.\n\nThe result must be freed with `duckdb_destroy_logical_type`\n\n type: The logical type object\n returns: The child type of the list type. Must be destroyed with `duckdb_destroy_logical_type`."] pub fn duckdb_list_type_child_type(type_: duckdb_logical_type) -> duckdb_logical_type; } extern "C" { - #[doc = "Retrieves the key type of the given map type."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_destroy_logical_type`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The key type of the map type. Must be destroyed with `duckdb_destroy_logical_type`."] + #[doc = "Retrieves the key type of the given map type.\n\nThe result must be freed with `duckdb_destroy_logical_type`\n\n type: The logical type object\n returns: The key type of the map type. Must be destroyed with `duckdb_destroy_logical_type`."] pub fn duckdb_map_type_key_type(type_: duckdb_logical_type) -> duckdb_logical_type; } extern "C" { - #[doc = "Retrieves the value type of the given map type."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_destroy_logical_type`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The value type of the map type. Must be destroyed with `duckdb_destroy_logical_type`."] + #[doc = "Retrieves the value type of the given map type.\n\nThe result must be freed with `duckdb_destroy_logical_type`\n\n type: The logical type object\n returns: The value type of the map type. Must be destroyed with `duckdb_destroy_logical_type`."] pub fn duckdb_map_type_value_type(type_: duckdb_logical_type) -> duckdb_logical_type; } extern "C" { - #[doc = "Returns the number of children of a struct type."] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " returns: The number of children of a struct type."] + #[doc = "Returns the number of children of a struct type.\n\n type: The logical type object\n returns: The number of children of a struct type."] pub fn duckdb_struct_type_child_count(type_: duckdb_logical_type) -> idx_t; } extern "C" { - #[doc = "Retrieves the name of the struct child."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_free`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " index: The child index"] - #[doc = " returns: The name of the struct type. Must be freed with `duckdb_free`."] + #[doc = "Retrieves the name of the struct child.\n\nThe result must be freed with `duckdb_free`\n\n type: The logical type object\n index: The child index\n returns: The name of the struct type. Must be freed with `duckdb_free`."] pub fn duckdb_struct_type_child_name(type_: duckdb_logical_type, index: idx_t) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = "Retrieves the child type of the given struct type at the specified index."] - #[doc = ""] - #[doc = "The result must be freed with `duckdb_destroy_logical_type`"] - #[doc = ""] - #[doc = " type: The logical type object"] - #[doc = " index: The child index"] - #[doc = " returns: The child type of the struct type. Must be destroyed with `duckdb_destroy_logical_type`."] + #[doc = "Retrieves the child type of the given struct type at the specified index.\n\nThe result must be freed with `duckdb_destroy_logical_type`\n\n type: The logical type object\n index: The child index\n returns: The child type of the struct type. Must be destroyed with `duckdb_destroy_logical_type`."] pub fn duckdb_struct_type_child_type(type_: duckdb_logical_type, index: idx_t) -> duckdb_logical_type; } extern "C" { - #[doc = "Destroys the logical type and de-allocates all memory allocated for that type."] - #[doc = ""] - #[doc = " type: The logical type to destroy."] + #[doc = "Destroys the logical type and de-allocates all memory allocated for that type.\n\n type: The logical type to destroy."] pub fn duckdb_destroy_logical_type(type_: *mut duckdb_logical_type); } extern "C" { - #[doc = "Creates an empty DataChunk with the specified set of types."] - #[doc = ""] - #[doc = " types: An array of types of the data chunk."] - #[doc = " column_count: The number of columns."] - #[doc = " returns: The data chunk."] + #[doc = "Creates an empty DataChunk with the specified set of types.\n\n types: An array of types of the data chunk.\n column_count: The number of columns.\n returns: The data chunk."] pub fn duckdb_create_data_chunk(types: *mut duckdb_logical_type, column_count: idx_t) -> duckdb_data_chunk; } extern "C" { - #[doc = "Destroys the data chunk and de-allocates all memory allocated for that chunk."] - #[doc = ""] - #[doc = " chunk: The data chunk to destroy."] + #[doc = "Destroys the data chunk and de-allocates all memory allocated for that chunk.\n\n chunk: The data chunk to destroy."] pub fn duckdb_destroy_data_chunk(chunk: *mut duckdb_data_chunk); } extern "C" { - #[doc = "Resets a data chunk, clearing the validity masks and setting the cardinality of the data chunk to 0."] - #[doc = ""] - #[doc = " chunk: The data chunk to reset."] + #[doc = "Resets a data chunk, clearing the validity masks and setting the cardinality of the data chunk to 0.\n\n chunk: The data chunk to reset."] pub fn duckdb_data_chunk_reset(chunk: duckdb_data_chunk); } extern "C" { - #[doc = "Retrieves the number of columns in a data chunk."] - #[doc = ""] - #[doc = " chunk: The data chunk to get the data from"] - #[doc = " returns: The number of columns in the data chunk"] + #[doc = "Retrieves the number of columns in a data chunk.\n\n chunk: The data chunk to get the data from\n returns: The number of columns in the data chunk"] pub fn duckdb_data_chunk_get_column_count(chunk: duckdb_data_chunk) -> idx_t; } extern "C" { - #[doc = "Retrieves the vector at the specified column index in the data chunk."] - #[doc = ""] - #[doc = "The pointer to the vector is valid for as long as the chunk is alive."] - #[doc = "It does NOT need to be destroyed."] - #[doc = ""] - #[doc = " chunk: The data chunk to get the data from"] - #[doc = " returns: The vector"] + #[doc = "Retrieves the vector at the specified column index in the data chunk.\n\nThe pointer to the vector is valid for as long as the chunk is alive.\nIt does NOT need to be destroyed.\n\n chunk: The data chunk to get the data from\n returns: The vector"] pub fn duckdb_data_chunk_get_vector(chunk: duckdb_data_chunk, col_idx: idx_t) -> duckdb_vector; } extern "C" { - #[doc = "Retrieves the current number of tuples in a data chunk."] - #[doc = ""] - #[doc = " chunk: The data chunk to get the data from"] - #[doc = " returns: The number of tuples in the data chunk"] + #[doc = "Retrieves the current number of tuples in a data chunk.\n\n chunk: The data chunk to get the data from\n returns: The number of tuples in the data chunk"] pub fn duckdb_data_chunk_get_size(chunk: duckdb_data_chunk) -> idx_t; } extern "C" { - #[doc = "Sets the current number of tuples in a data chunk."] - #[doc = ""] - #[doc = " chunk: The data chunk to set the size in"] - #[doc = " size: The number of tuples in the data chunk"] + #[doc = "Sets the current number of tuples in a data chunk.\n\n chunk: The data chunk to set the size in\n size: The number of tuples in the data chunk"] pub fn duckdb_data_chunk_set_size(chunk: duckdb_data_chunk, size: idx_t); } extern "C" { - #[doc = "Retrieves the column type of the specified vector."] - #[doc = ""] - #[doc = "The result must be destroyed with `duckdb_destroy_logical_type`."] - #[doc = ""] - #[doc = " vector: The vector get the data from"] - #[doc = " returns: The type of the vector"] + #[doc = "Retrieves the column type of the specified vector.\n\nThe result must be destroyed with `duckdb_destroy_logical_type`.\n\n vector: The vector get the data from\n returns: The type of the vector"] pub fn duckdb_vector_get_column_type(vector: duckdb_vector) -> duckdb_logical_type; } extern "C" { - #[doc = "Retrieves the data pointer of the vector."] - #[doc = ""] - #[doc = "The data pointer can be used to read or write values from the vector."] - #[doc = "How to read or write values depends on the type of the vector."] - #[doc = ""] - #[doc = " vector: The vector to get the data from"] - #[doc = " returns: The data pointer"] + #[doc = "Retrieves the data pointer of the vector.\n\nThe data pointer can be used to read or write values from the vector.\nHow to read or write values depends on the type of the vector.\n\n vector: The vector to get the data from\n returns: The data pointer"] pub fn duckdb_vector_get_data(vector: duckdb_vector) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Retrieves the validity mask pointer of the specified vector."] - #[doc = ""] - #[doc = "If all values are valid, this function MIGHT return NULL!"] - #[doc = ""] - #[doc = "The validity mask is a bitset that signifies null-ness within the data chunk."] - #[doc = "It is a series of uint64_t values, where each uint64_t value contains validity for 64 tuples."] - #[doc = "The bit is set to 1 if the value is valid (i.e. not NULL) or 0 if the value is invalid (i.e. NULL)."] - #[doc = ""] - #[doc = "Validity of a specific value can be obtained like this:"] - #[doc = ""] - #[doc = "idx_t entry_idx = row_idx / 64;"] - #[doc = "idx_t idx_in_entry = row_idx % 64;"] - #[doc = "bool is_valid = validity_mask[entry_idx] & (1 << idx_in_entry);"] - #[doc = ""] - #[doc = "Alternatively, the (slower) duckdb_validity_row_is_valid function can be used."] - #[doc = ""] - #[doc = " vector: The vector to get the data from"] - #[doc = " returns: The pointer to the validity mask, or NULL if no validity mask is present"] + #[doc = "Retrieves the validity mask pointer of the specified vector.\n\nIf all values are valid, this function MIGHT return NULL!\n\nThe validity mask is a bitset that signifies null-ness within the data chunk.\nIt is a series of uint64_t values, where each uint64_t value contains validity for 64 tuples.\nThe bit is set to 1 if the value is valid (i.e. not NULL) or 0 if the value is invalid (i.e. NULL).\n\nValidity of a specific value can be obtained like this:\n\nidx_t entry_idx = row_idx / 64;\nidx_t idx_in_entry = row_idx % 64;\nbool is_valid = validity_mask[entry_idx] & (1 << idx_in_entry);\n\nAlternatively, the (slower) duckdb_validity_row_is_valid function can be used.\n\n vector: The vector to get the data from\n returns: The pointer to the validity mask, or NULL if no validity mask is present"] pub fn duckdb_vector_get_validity(vector: duckdb_vector) -> *mut u64; } extern "C" { - #[doc = "Ensures the validity mask is writable by allocating it."] - #[doc = ""] - #[doc = "After this function is called, `duckdb_vector_get_validity` will ALWAYS return non-NULL."] - #[doc = "This allows null values to be written to the vector, regardless of whether a validity mask was present before."] - #[doc = ""] - #[doc = " vector: The vector to alter"] + #[doc = "Ensures the validity mask is writable by allocating it.\n\nAfter this function is called, `duckdb_vector_get_validity` will ALWAYS return non-NULL.\nThis allows null values to be written to the vector, regardless of whether a validity mask was present before.\n\n vector: The vector to alter"] pub fn duckdb_vector_ensure_validity_writable(vector: duckdb_vector); } extern "C" { - #[doc = "Assigns a string element in the vector at the specified location."] - #[doc = ""] - #[doc = " vector: The vector to alter"] - #[doc = " index: The row position in the vector to assign the string to"] - #[doc = " str: The null-terminated string"] + #[doc = "Assigns a string element in the vector at the specified location.\n\n vector: The vector to alter\n index: The row position in the vector to assign the string to\n str: The null-terminated string"] pub fn duckdb_vector_assign_string_element( vector: duckdb_vector, index: idx_t, @@ -12209,12 +12289,7 @@ extern "C" { ); } extern "C" { - #[doc = "Assigns a string element in the vector at the specified location."] - #[doc = ""] - #[doc = " vector: The vector to alter"] - #[doc = " index: The row position in the vector to assign the string to"] - #[doc = " str: The string"] - #[doc = " str_len: The length of the string (in bytes)"] + #[doc = "Assigns a string element in the vector at the specified location.\n\n vector: The vector to alter\n index: The row position in the vector to assign the string to\n str: The string\n str_len: The length of the string (in bytes)"] pub fn duckdb_vector_assign_string_element_len( vector: duckdb_vector, index: idx_t, @@ -12223,66 +12298,31 @@ extern "C" { ); } extern "C" { - #[doc = "Retrieves the child vector of a list vector."] - #[doc = ""] - #[doc = "The resulting vector is valid as long as the parent vector is valid."] - #[doc = ""] - #[doc = " vector: The vector"] - #[doc = " returns: The child vector"] + #[doc = "Retrieves the child vector of a list vector.\n\nThe resulting vector is valid as long as the parent vector is valid.\n\n vector: The vector\n returns: The child vector"] pub fn duckdb_list_vector_get_child(vector: duckdb_vector) -> duckdb_vector; } extern "C" { - #[doc = "Returns the size of the child vector of the list"] - #[doc = ""] - #[doc = " vector: The vector"] - #[doc = " returns: The size of the child list"] + #[doc = "Returns the size of the child vector of the list\n\n vector: The vector\n returns: The size of the child list"] pub fn duckdb_list_vector_get_size(vector: duckdb_vector) -> idx_t; } extern "C" { - #[doc = "Retrieves the child vector of a struct vector."] - #[doc = ""] - #[doc = "The resulting vector is valid as long as the parent vector is valid."] - #[doc = ""] - #[doc = " vector: The vector"] - #[doc = " index: The child index"] - #[doc = " returns: The child vector"] + #[doc = "Retrieves the child vector of a struct vector.\n\nThe resulting vector is valid as long as the parent vector is valid.\n\n vector: The vector\n index: The child index\n returns: The child vector"] pub fn duckdb_struct_vector_get_child(vector: duckdb_vector, index: idx_t) -> duckdb_vector; } extern "C" { - #[doc = "Returns whether or not a row is valid (i.e. not NULL) in the given validity mask."] - #[doc = ""] - #[doc = " validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`"] - #[doc = " row: The row index"] - #[doc = " returns: true if the row is valid, false otherwise"] + #[doc = "Returns whether or not a row is valid (i.e. not NULL) in the given validity mask.\n\n validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`\n row: The row index\n returns: true if the row is valid, false otherwise"] pub fn duckdb_validity_row_is_valid(validity: *mut u64, row: idx_t) -> bool; } extern "C" { - #[doc = "In a validity mask, sets a specific row to either valid or invalid."] - #[doc = ""] - #[doc = "Note that `duckdb_data_chunk_ensure_validity_writable` should be called before calling `duckdb_data_chunk_get_validity`,"] - #[doc = "to ensure that there is a validity mask to write to."] - #[doc = ""] - #[doc = " validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`."] - #[doc = " row: The row index"] - #[doc = " valid: Whether or not to set the row to valid, or invalid"] + #[doc = "In a validity mask, sets a specific row to either valid or invalid.\n\nNote that `duckdb_data_chunk_ensure_validity_writable` should be called before calling `duckdb_data_chunk_get_validity`,\nto ensure that there is a validity mask to write to.\n\n validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`.\n row: The row index\n valid: Whether or not to set the row to valid, or invalid"] pub fn duckdb_validity_set_row_validity(validity: *mut u64, row: idx_t, valid: bool); } extern "C" { - #[doc = "In a validity mask, sets a specific row to invalid."] - #[doc = ""] - #[doc = "Equivalent to `duckdb_validity_set_row_validity` with valid set to false."] - #[doc = ""] - #[doc = " validity: The validity mask"] - #[doc = " row: The row index"] + #[doc = "In a validity mask, sets a specific row to invalid.\n\nEquivalent to `duckdb_validity_set_row_validity` with valid set to false.\n\n validity: The validity mask\n row: The row index"] pub fn duckdb_validity_set_row_invalid(validity: *mut u64, row: idx_t); } extern "C" { - #[doc = "In a validity mask, sets a specific row to valid."] - #[doc = ""] - #[doc = "Equivalent to `duckdb_validity_set_row_validity` with valid set to true."] - #[doc = ""] - #[doc = " validity: The validity mask"] - #[doc = " row: The row index"] + #[doc = "In a validity mask, sets a specific row to valid.\n\nEquivalent to `duckdb_validity_set_row_validity` with valid set to true.\n\n validity: The validity mask\n row: The row index"] pub fn duckdb_validity_set_row_valid(validity: *mut u64, row: idx_t); } pub type duckdb_table_function = *mut ::std::os::raw::c_void; @@ -12295,39 +12335,23 @@ pub type duckdb_table_function_t = ::std::option::Option; pub type duckdb_delete_callback_t = ::std::option::Option; extern "C" { - #[doc = "Creates a new empty table function."] - #[doc = ""] - #[doc = "The return value should be destroyed with `duckdb_destroy_table_function`."] - #[doc = ""] - #[doc = " returns: The table function object."] + #[doc = "Creates a new empty table function.\n\nThe return value should be destroyed with `duckdb_destroy_table_function`.\n\n returns: The table function object."] pub fn duckdb_create_table_function() -> duckdb_table_function; } extern "C" { - #[doc = "Destroys the given table function object."] - #[doc = ""] - #[doc = " table_function: The table function to destroy"] + #[doc = "Destroys the given table function object.\n\n table_function: The table function to destroy"] pub fn duckdb_destroy_table_function(table_function: *mut duckdb_table_function); } extern "C" { - #[doc = "Sets the name of the given table function."] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " name: The name of the table function"] + #[doc = "Sets the name of the given table function.\n\n table_function: The table function\n name: The name of the table function"] pub fn duckdb_table_function_set_name(table_function: duckdb_table_function, name: *const ::std::os::raw::c_char); } extern "C" { - #[doc = "Adds a parameter to the table function."] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " type: The type of the parameter to add."] + #[doc = "Adds a parameter to the table function.\n\n table_function: The table function\n type: The type of the parameter to add."] pub fn duckdb_table_function_add_parameter(table_function: duckdb_table_function, type_: duckdb_logical_type); } extern "C" { - #[doc = "Assigns extra information to the table function that can be fetched during binding, etc."] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " extra_info: The extra information"] - #[doc = " destroy: The callback that will be called to destroy the bind data (if any)"] + #[doc = "Assigns extra information to the table function that can be fetched during binding, etc.\n\n table_function: The table function\n extra_info: The extra information\n destroy: The callback that will be called to destroy the bind data (if any)"] pub fn duckdb_table_function_set_extra_info( table_function: duckdb_table_function, extra_info: *mut ::std::os::raw::c_void, @@ -12335,72 +12359,38 @@ extern "C" { ); } extern "C" { - #[doc = "Sets the bind function of the table function"] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " bind: The bind function"] + #[doc = "Sets the bind function of the table function\n\n table_function: The table function\n bind: The bind function"] pub fn duckdb_table_function_set_bind(table_function: duckdb_table_function, bind: duckdb_table_function_bind_t); } extern "C" { - #[doc = "Sets the init function of the table function"] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " init: The init function"] + #[doc = "Sets the init function of the table function\n\n table_function: The table function\n init: The init function"] pub fn duckdb_table_function_set_init(table_function: duckdb_table_function, init: duckdb_table_function_init_t); } extern "C" { - #[doc = "Sets the thread-local init function of the table function"] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " init: The init function"] + #[doc = "Sets the thread-local init function of the table function\n\n table_function: The table function\n init: The init function"] pub fn duckdb_table_function_set_local_init( table_function: duckdb_table_function, init: duckdb_table_function_init_t, ); } extern "C" { - #[doc = "Sets the main function of the table function"] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " function: The function"] + #[doc = "Sets the main function of the table function\n\n table_function: The table function\n function: The function"] pub fn duckdb_table_function_set_function(table_function: duckdb_table_function, function: duckdb_table_function_t); } extern "C" { - #[doc = "Sets whether or not the given table function supports projection pushdown."] - #[doc = ""] - #[doc = "If this is set to true, the system will provide a list of all required columns in the `init` stage through"] - #[doc = "the `duckdb_init_get_column_count` and `duckdb_init_get_column_index` functions."] - #[doc = "If this is set to false (the default), the system will expect all columns to be projected."] - #[doc = ""] - #[doc = " table_function: The table function"] - #[doc = " pushdown: True if the table function supports projection pushdown, false otherwise."] + #[doc = "Sets whether or not the given table function supports projection pushdown.\n\nIf this is set to true, the system will provide a list of all required columns in the `init` stage through\nthe `duckdb_init_get_column_count` and `duckdb_init_get_column_index` functions.\nIf this is set to false (the default), the system will expect all columns to be projected.\n\n table_function: The table function\n pushdown: True if the table function supports projection pushdown, false otherwise."] pub fn duckdb_table_function_supports_projection_pushdown(table_function: duckdb_table_function, pushdown: bool); } extern "C" { - #[doc = "Register the table function object within the given connection."] - #[doc = ""] - #[doc = "The function requires at least a name, a bind function, an init function and a main function."] - #[doc = ""] - #[doc = "If the function is incomplete or a function with this name already exists DuckDBError is returned."] - #[doc = ""] - #[doc = " con: The connection to register it in."] - #[doc = " function: The function pointer"] - #[doc = " returns: Whether or not the registration was successful."] + #[doc = "Register the table function object within the given connection.\n\nThe function requires at least a name, a bind function, an init function and a main function.\n\nIf the function is incomplete or a function with this name already exists DuckDBError is returned.\n\n con: The connection to register it in.\n function: The function pointer\n returns: Whether or not the registration was successful."] pub fn duckdb_register_table_function(con: duckdb_connection, function: duckdb_table_function) -> duckdb_state; } extern "C" { - #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`"] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The extra info"] + #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`\n\n info: The info object\n returns: The extra info"] pub fn duckdb_bind_get_extra_info(info: duckdb_bind_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Adds a result column to the output of the table function."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " name: The name of the column"] - #[doc = " type: The logical type of the column"] + #[doc = "Adds a result column to the output of the table function.\n\n info: The info object\n name: The name of the column\n type: The logical type of the column"] pub fn duckdb_bind_add_result_column( info: duckdb_bind_info, name: *const ::std::os::raw::c_char, @@ -12408,28 +12398,15 @@ extern "C" { ); } extern "C" { - #[doc = "Retrieves the number of regular (non-named) parameters to the function."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The number of parameters"] + #[doc = "Retrieves the number of regular (non-named) parameters to the function.\n\n info: The info object\n returns: The number of parameters"] pub fn duckdb_bind_get_parameter_count(info: duckdb_bind_info) -> idx_t; } extern "C" { - #[doc = "Retrieves the parameter at the given index."] - #[doc = ""] - #[doc = "The result must be destroyed with `duckdb_destroy_value`."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " index: The index of the parameter to get"] - #[doc = " returns: The value of the parameter. Must be destroyed with `duckdb_destroy_value`."] + #[doc = "Retrieves the parameter at the given index.\n\nThe result must be destroyed with `duckdb_destroy_value`.\n\n info: The info object\n index: The index of the parameter to get\n returns: The value of the parameter. Must be destroyed with `duckdb_destroy_value`."] pub fn duckdb_bind_get_parameter(info: duckdb_bind_info, index: idx_t) -> duckdb_value; } extern "C" { - #[doc = "Sets the user-provided bind data in the bind object. This object can be retrieved again during execution."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " extra_data: The bind data object."] - #[doc = " destroy: The callback that will be called to destroy the bind data (if any)"] + #[doc = "Sets the user-provided bind data in the bind object. This object can be retrieved again during execution.\n\n info: The info object\n extra_data: The bind data object.\n destroy: The callback that will be called to destroy the bind data (if any)"] pub fn duckdb_bind_set_bind_data( info: duckdb_bind_info, bind_data: *mut ::std::os::raw::c_void, @@ -12437,42 +12414,23 @@ extern "C" { ); } extern "C" { - #[doc = "Sets the cardinality estimate for the table function, used for optimization."] - #[doc = ""] - #[doc = " info: The bind data object."] - #[doc = " is_exact: Whether or not the cardinality estimate is exact, or an approximation"] + #[doc = "Sets the cardinality estimate for the table function, used for optimization.\n\n info: The bind data object.\n is_exact: Whether or not the cardinality estimate is exact, or an approximation"] pub fn duckdb_bind_set_cardinality(info: duckdb_bind_info, cardinality: idx_t, is_exact: bool); } extern "C" { - #[doc = "Report that an error has occurred while calling bind."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " error: The error message"] + #[doc = "Report that an error has occurred while calling bind.\n\n info: The info object\n error: The error message"] pub fn duckdb_bind_set_error(info: duckdb_bind_info, error: *const ::std::os::raw::c_char); } extern "C" { - #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`"] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The extra info"] + #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`\n\n info: The info object\n returns: The extra info"] pub fn duckdb_init_get_extra_info(info: duckdb_init_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Gets the bind data set by `duckdb_bind_set_bind_data` during the bind."] - #[doc = ""] - #[doc = "Note that the bind data should be considered as read-only."] - #[doc = "For tracking state, use the init data instead."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The bind data object"] + #[doc = "Gets the bind data set by `duckdb_bind_set_bind_data` during the bind.\n\nNote that the bind data should be considered as read-only.\nFor tracking state, use the init data instead.\n\n info: The info object\n returns: The bind data object"] pub fn duckdb_init_get_bind_data(info: duckdb_init_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Sets the user-provided init data in the init object. This object can be retrieved again during execution."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " extra_data: The init data object."] - #[doc = " destroy: The callback that will be called to destroy the init data (if any)"] + #[doc = "Sets the user-provided init data in the init object. This object can be retrieved again during execution.\n\n info: The info object\n extra_data: The init data object.\n destroy: The callback that will be called to destroy the init data (if any)"] pub fn duckdb_init_set_init_data( info: duckdb_init_info, init_data: *mut ::std::os::raw::c_void, @@ -12480,74 +12438,39 @@ extern "C" { ); } extern "C" { - #[doc = "Returns the number of projected columns."] - #[doc = ""] - #[doc = "This function must be used if projection pushdown is enabled to figure out which columns to emit."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The number of projected columns."] + #[doc = "Returns the number of projected columns.\n\nThis function must be used if projection pushdown is enabled to figure out which columns to emit.\n\n info: The info object\n returns: The number of projected columns."] pub fn duckdb_init_get_column_count(info: duckdb_init_info) -> idx_t; } extern "C" { - #[doc = "Returns the column index of the projected column at the specified position."] - #[doc = ""] - #[doc = "This function must be used if projection pushdown is enabled to figure out which columns to emit."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " column_index: The index at which to get the projected column index, from 0..duckdb_init_get_column_count(info)"] - #[doc = " returns: The column index of the projected column."] + #[doc = "Returns the column index of the projected column at the specified position.\n\nThis function must be used if projection pushdown is enabled to figure out which columns to emit.\n\n info: The info object\n column_index: The index at which to get the projected column index, from 0..duckdb_init_get_column_count(info)\n returns: The column index of the projected column."] pub fn duckdb_init_get_column_index(info: duckdb_init_info, column_index: idx_t) -> idx_t; } extern "C" { - #[doc = "Sets how many threads can process this table function in parallel (default: 1)"] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " max_threads: The maximum amount of threads that can process this table function"] + #[doc = "Sets how many threads can process this table function in parallel (default: 1)\n\n info: The info object\n max_threads: The maximum amount of threads that can process this table function"] pub fn duckdb_init_set_max_threads(info: duckdb_init_info, max_threads: idx_t); } extern "C" { - #[doc = "Report that an error has occurred while calling init."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " error: The error message"] + #[doc = "Report that an error has occurred while calling init.\n\n info: The info object\n error: The error message"] pub fn duckdb_init_set_error(info: duckdb_init_info, error: *const ::std::os::raw::c_char); } extern "C" { - #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`"] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The extra info"] + #[doc = "Retrieves the extra info of the function as set in `duckdb_table_function_set_extra_info`\n\n info: The info object\n returns: The extra info"] pub fn duckdb_function_get_extra_info(info: duckdb_function_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Gets the bind data set by `duckdb_bind_set_bind_data` during the bind."] - #[doc = ""] - #[doc = "Note that the bind data should be considered as read-only."] - #[doc = "For tracking state, use the init data instead."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The bind data object"] + #[doc = "Gets the bind data set by `duckdb_bind_set_bind_data` during the bind.\n\nNote that the bind data should be considered as read-only.\nFor tracking state, use the init data instead.\n\n info: The info object\n returns: The bind data object"] pub fn duckdb_function_get_bind_data(info: duckdb_function_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Gets the init data set by `duckdb_init_set_init_data` during the init."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The init data object"] + #[doc = "Gets the init data set by `duckdb_init_set_init_data` during the init.\n\n info: The info object\n returns: The init data object"] pub fn duckdb_function_get_init_data(info: duckdb_function_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Gets the thread-local init data set by `duckdb_init_set_init_data` during the local_init."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " returns: The init data object"] + #[doc = "Gets the thread-local init data set by `duckdb_init_set_init_data` during the local_init.\n\n info: The info object\n returns: The init data object"] pub fn duckdb_function_get_local_init_data(info: duckdb_function_info) -> *mut ::std::os::raw::c_void; } extern "C" { - #[doc = "Report that an error has occurred while executing the function."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " error: The error message"] + #[doc = "Report that an error has occurred while executing the function.\n\n info: The info object\n error: The error message"] pub fn duckdb_function_set_error(info: duckdb_function_info, error: *const ::std::os::raw::c_char); } pub type duckdb_replacement_scan_info = *mut ::std::os::raw::c_void; @@ -12559,12 +12482,7 @@ pub type duckdb_replacement_callback_t = ::std::option::Option< ), >; extern "C" { - #[doc = "Add a replacement scan definition to the specified database"] - #[doc = ""] - #[doc = " db: The database object to add the replacement scan to"] - #[doc = " replacement: The replacement scan callback"] - #[doc = " extra_data: Extra data that is passed back into the specified callback"] - #[doc = " delete_callback: The delete callback to call on the extra data, if any"] + #[doc = "Add a replacement scan definition to the specified database\n\n db: The database object to add the replacement scan to\n replacement: The replacement scan callback\n extra_data: Extra data that is passed back into the specified callback\n delete_callback: The delete callback to call on the extra data, if any"] pub fn duckdb_add_replacement_scan( db: duckdb_database, replacement: duckdb_replacement_callback_t, @@ -12573,38 +12491,22 @@ extern "C" { ); } extern "C" { - #[doc = "Sets the replacement function name to use. If this function is called in the replacement callback,"] - #[doc = "the replacement scan is performed. If it is not called, the replacement callback is not performed."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " function_name: The function name to substitute."] + #[doc = "Sets the replacement function name to use. If this function is called in the replacement callback,\nthe replacement scan is performed. If it is not called, the replacement callback is not performed.\n\n info: The info object\n function_name: The function name to substitute."] pub fn duckdb_replacement_scan_set_function_name( info: duckdb_replacement_scan_info, function_name: *const ::std::os::raw::c_char, ); } extern "C" { - #[doc = "Adds a parameter to the replacement scan function."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " parameter: The parameter to add."] + #[doc = "Adds a parameter to the replacement scan function.\n\n info: The info object\n parameter: The parameter to add."] pub fn duckdb_replacement_scan_add_parameter(info: duckdb_replacement_scan_info, parameter: duckdb_value); } extern "C" { - #[doc = "Report that an error has occurred while executing the replacement scan."] - #[doc = ""] - #[doc = " info: The info object"] - #[doc = " error: The error message"] + #[doc = "Report that an error has occurred while executing the replacement scan.\n\n info: The info object\n error: The error message"] pub fn duckdb_replacement_scan_set_error(info: duckdb_replacement_scan_info, error: *const ::std::os::raw::c_char); } extern "C" { - #[doc = "Creates an appender object."] - #[doc = ""] - #[doc = " connection: The connection context to create the appender in."] - #[doc = " schema: The schema of the table to append to, or `nullptr` for the default schema."] - #[doc = " table: The table name to append to."] - #[doc = " out_appender: The resulting appender object."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Creates an appender object.\n\n connection: The connection context to create the appender in.\n schema: The schema of the table to append to, or `nullptr` for the default schema.\n table: The table name to append to.\n out_appender: The resulting appender object.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_appender_create( connection: duckdb_connection, schema: *const ::std::os::raw::c_char, @@ -12613,41 +12515,19 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Returns the error message associated with the given appender."] - #[doc = "If the appender has no error message, this returns `nullptr` instead."] - #[doc = ""] - #[doc = "The error message should not be freed. It will be de-allocated when `duckdb_appender_destroy` is called."] - #[doc = ""] - #[doc = " appender: The appender to get the error from."] - #[doc = " returns: The error message, or `nullptr` if there is none."] + #[doc = "Returns the error message associated with the given appender.\nIf the appender has no error message, this returns `nullptr` instead.\n\nThe error message should not be freed. It will be de-allocated when `duckdb_appender_destroy` is called.\n\n appender: The appender to get the error from.\n returns: The error message, or `nullptr` if there is none."] pub fn duckdb_appender_error(appender: duckdb_appender) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Flush the appender to the table, forcing the cache of the appender to be cleared and the data to be appended to the"] - #[doc = "base table."] - #[doc = ""] - #[doc = "This should generally not be used unless you know what you are doing. Instead, call `duckdb_appender_destroy` when you"] - #[doc = "are done with the appender."] - #[doc = ""] - #[doc = " appender: The appender to flush."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Flush the appender to the table, forcing the cache of the appender to be cleared and the data to be appended to the\nbase table.\n\nThis should generally not be used unless you know what you are doing. Instead, call `duckdb_appender_destroy` when you\nare done with the appender.\n\n appender: The appender to flush.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_appender_flush(appender: duckdb_appender) -> duckdb_state; } extern "C" { - #[doc = "Close the appender, flushing all intermediate state in the appender to the table and closing it for further appends."] - #[doc = ""] - #[doc = "This is generally not necessary. Call `duckdb_appender_destroy` instead."] - #[doc = ""] - #[doc = " appender: The appender to flush and close."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Close the appender, flushing all intermediate state in the appender to the table and closing it for further appends.\n\nThis is generally not necessary. Call `duckdb_appender_destroy` instead.\n\n appender: The appender to flush and close.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_appender_close(appender: duckdb_appender) -> duckdb_state; } extern "C" { - #[doc = "Close the appender and destroy it. Flushing all intermediate state in the appender to the table, and de-allocating"] - #[doc = "all memory associated with the appender."] - #[doc = ""] - #[doc = " appender: The appender to flush, close and destroy."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Close the appender and destroy it. Flushing all intermediate state in the appender to the table, and de-allocating\nall memory associated with the appender.\n\n appender: The appender to flush, close and destroy.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_appender_destroy(appender: *mut duckdb_appender) -> duckdb_state; } extern "C" { @@ -12655,10 +12535,7 @@ extern "C" { pub fn duckdb_appender_begin_row(appender: duckdb_appender) -> duckdb_state; } extern "C" { - #[doc = "Finish the current row of appends. After end_row is called, the next row can be appended."] - #[doc = ""] - #[doc = " appender: The appender."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Finish the current row of appends. After end_row is called, the next row can be appended.\n\n appender: The appender.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_appender_end_row(appender: duckdb_appender) -> duckdb_state; } extern "C" { @@ -12750,29 +12627,11 @@ extern "C" { pub fn duckdb_append_null(appender: duckdb_appender) -> duckdb_state; } extern "C" { - #[doc = "Appends a pre-filled data chunk to the specified appender."] - #[doc = ""] - #[doc = "The types of the data chunk must exactly match the types of the table, no casting is performed."] - #[doc = "If the types do not match or the appender is in an invalid state, DuckDBError is returned."] - #[doc = "If the append is successful, DuckDBSuccess is returned."] - #[doc = ""] - #[doc = " appender: The appender to append to."] - #[doc = " chunk: The data chunk to append."] - #[doc = " returns: The return state."] + #[doc = "Appends a pre-filled data chunk to the specified appender.\n\nThe types of the data chunk must exactly match the types of the table, no casting is performed.\nIf the types do not match or the appender is in an invalid state, DuckDBError is returned.\nIf the append is successful, DuckDBSuccess is returned.\n\n appender: The appender to append to.\n chunk: The data chunk to append.\n returns: The return state."] pub fn duckdb_append_data_chunk(appender: duckdb_appender, chunk: duckdb_data_chunk) -> duckdb_state; } extern "C" { - #[doc = "Executes a SQL query within a connection and stores the full (materialized) result in an arrow structure."] - #[doc = "If the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling"] - #[doc = "`duckdb_query_arrow_error`."] - #[doc = ""] - #[doc = "Note that after running `duckdb_query_arrow`, `duckdb_destroy_arrow` must be called on the result object even if the"] - #[doc = "query fails, otherwise the error stored within the result will not be freed correctly."] - #[doc = ""] - #[doc = " connection: The connection to perform the query in."] - #[doc = " query: The SQL query to run."] - #[doc = " out_result: The query result."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Executes a SQL query within a connection and stores the full (materialized) result in an arrow structure.\nIf the query fails to execute, DuckDBError is returned and the error message can be retrieved by calling\n`duckdb_query_arrow_error`.\n\nNote that after running `duckdb_query_arrow`, `duckdb_destroy_arrow` must be called on the result object even if the\nquery fails, otherwise the error stored within the result will not be freed correctly.\n\n connection: The connection to perform the query in.\n query: The SQL query to run.\n out_result: The query result.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_query_arrow( connection: duckdb_connection, query: *const ::std::os::raw::c_char, @@ -12780,124 +12639,60 @@ extern "C" { ) -> duckdb_state; } extern "C" { - #[doc = "Fetch the internal arrow schema from the arrow result."] - #[doc = ""] - #[doc = " result: The result to fetch the schema from."] - #[doc = " out_schema: The output schema."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Fetch the internal arrow schema from the arrow result.\n\n result: The result to fetch the schema from.\n out_schema: The output schema.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_query_arrow_schema(result: duckdb_arrow, out_schema: *mut duckdb_arrow_schema) -> duckdb_state; } extern "C" { - #[doc = "Fetch an internal arrow array from the arrow result."] - #[doc = ""] - #[doc = "This function can be called multiple time to get next chunks, which will free the previous out_array."] - #[doc = "So consume the out_array before calling this function again."] - #[doc = ""] - #[doc = " result: The result to fetch the array from."] - #[doc = " out_array: The output array."] - #[doc = " returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] + #[doc = "Fetch an internal arrow array from the arrow result.\n\nThis function can be called multiple time to get next chunks, which will free the previous out_array.\nSo consume the out_array before calling this function again.\n\n result: The result to fetch the array from.\n out_array: The output array.\n returns: `DuckDBSuccess` on success or `DuckDBError` on failure."] pub fn duckdb_query_arrow_array(result: duckdb_arrow, out_array: *mut duckdb_arrow_array) -> duckdb_state; } extern "C" { - #[doc = "Returns the number of columns present in a the arrow result object."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of columns present in the result object."] + #[doc = "Returns the number of columns present in a the arrow result object.\n\n result: The result object.\n returns: The number of columns present in the result object."] pub fn duckdb_arrow_column_count(result: duckdb_arrow) -> idx_t; } extern "C" { - #[doc = "Returns the number of rows present in a the arrow result object."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of rows present in the result object."] + #[doc = "Returns the number of rows present in a the arrow result object.\n\n result: The result object.\n returns: The number of rows present in the result object."] pub fn duckdb_arrow_row_count(result: duckdb_arrow) -> idx_t; } extern "C" { - #[doc = "Returns the number of rows changed by the query stored in the arrow result. This is relevant only for"] - #[doc = "INSERT/UPDATE/DELETE queries. For other queries the rows_changed will be 0."] - #[doc = ""] - #[doc = " result: The result object."] - #[doc = " returns: The number of rows changed."] + #[doc = "Returns the number of rows changed by the query stored in the arrow result. This is relevant only for\nINSERT/UPDATE/DELETE queries. For other queries the rows_changed will be 0.\n\n result: The result object.\n returns: The number of rows changed."] pub fn duckdb_arrow_rows_changed(result: duckdb_arrow) -> idx_t; } extern "C" { - #[doc = "Returns the error message contained within the result. The error is only set if `duckdb_query_arrow` returns"] - #[doc = "`DuckDBError`."] - #[doc = ""] - #[doc = "The error message should not be freed. It will be de-allocated when `duckdb_destroy_arrow` is called."] - #[doc = ""] - #[doc = " result: The result object to fetch the nullmask from."] - #[doc = " returns: The error of the result."] + #[doc = "Returns the error message contained within the result. The error is only set if `duckdb_query_arrow` returns\n`DuckDBError`.\n\nThe error message should not be freed. It will be de-allocated when `duckdb_destroy_arrow` is called.\n\n result: The result object to fetch the nullmask from.\n returns: The error of the result."] pub fn duckdb_query_arrow_error(result: duckdb_arrow) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = "Closes the result and de-allocates all memory allocated for the arrow result."] - #[doc = ""] - #[doc = " result: The result to destroy."] + #[doc = "Closes the result and de-allocates all memory allocated for the arrow result.\n\n result: The result to destroy."] pub fn duckdb_destroy_arrow(result: *mut duckdb_arrow); } pub type duckdb_task_state = *mut ::std::os::raw::c_void; extern "C" { - #[doc = "Execute DuckDB tasks on this thread."] - #[doc = ""] - #[doc = "Will return after `max_tasks` have been executed, or if there are no more tasks present."] - #[doc = ""] - #[doc = " database: The database object to execute tasks for"] - #[doc = " max_tasks: The maximum amount of tasks to execute"] + #[doc = "Execute DuckDB tasks on this thread.\n\nWill return after `max_tasks` have been executed, or if there are no more tasks present.\n\n database: The database object to execute tasks for\n max_tasks: The maximum amount of tasks to execute"] pub fn duckdb_execute_tasks(database: duckdb_database, max_tasks: idx_t); } extern "C" { - #[doc = "Creates a task state that can be used with duckdb_execute_tasks_state to execute tasks until"] - #[doc = "duckdb_finish_execution is called on the state."] - #[doc = ""] - #[doc = "duckdb_destroy_state should be called on the result in order to free memory."] - #[doc = ""] - #[doc = " database: The database object to create the task state for"] - #[doc = " returns: The task state that can be used with duckdb_execute_tasks_state."] + #[doc = "Creates a task state that can be used with duckdb_execute_tasks_state to execute tasks until\nduckdb_finish_execution is called on the state.\n\nduckdb_destroy_state should be called on the result in order to free memory.\n\n database: The database object to create the task state for\n returns: The task state that can be used with duckdb_execute_tasks_state."] pub fn duckdb_create_task_state(database: duckdb_database) -> duckdb_task_state; } extern "C" { - #[doc = "Execute DuckDB tasks on this thread."] - #[doc = ""] - #[doc = "The thread will keep on executing tasks forever, until duckdb_finish_execution is called on the state."] - #[doc = "Multiple threads can share the same duckdb_task_state."] - #[doc = ""] - #[doc = " state: The task state of the executor"] + #[doc = "Execute DuckDB tasks on this thread.\n\nThe thread will keep on executing tasks forever, until duckdb_finish_execution is called on the state.\nMultiple threads can share the same duckdb_task_state.\n\n state: The task state of the executor"] pub fn duckdb_execute_tasks_state(state: duckdb_task_state); } extern "C" { - #[doc = "Execute DuckDB tasks on this thread."] - #[doc = ""] - #[doc = "The thread will keep on executing tasks until either duckdb_finish_execution is called on the state,"] - #[doc = "max_tasks tasks have been executed or there are no more tasks to be executed."] - #[doc = ""] - #[doc = "Multiple threads can share the same duckdb_task_state."] - #[doc = ""] - #[doc = " state: The task state of the executor"] - #[doc = " max_tasks: The maximum amount of tasks to execute"] - #[doc = " returns: The amount of tasks that have actually been executed"] + #[doc = "Execute DuckDB tasks on this thread.\n\nThe thread will keep on executing tasks until either duckdb_finish_execution is called on the state,\nmax_tasks tasks have been executed or there are no more tasks to be executed.\n\nMultiple threads can share the same duckdb_task_state.\n\n state: The task state of the executor\n max_tasks: The maximum amount of tasks to execute\n returns: The amount of tasks that have actually been executed"] pub fn duckdb_execute_n_tasks_state(state: duckdb_task_state, max_tasks: idx_t) -> idx_t; } extern "C" { - #[doc = "Finish execution on a specific task."] - #[doc = ""] - #[doc = " state: The task state to finish execution"] + #[doc = "Finish execution on a specific task.\n\n state: The task state to finish execution"] pub fn duckdb_finish_execution(state: duckdb_task_state); } extern "C" { - #[doc = "Check if the provided duckdb_task_state has finished execution"] - #[doc = ""] - #[doc = " state: The task state to inspect"] - #[doc = " returns: Whether or not duckdb_finish_execution has been called on the task state"] + #[doc = "Check if the provided duckdb_task_state has finished execution\n\n state: The task state to inspect\n returns: Whether or not duckdb_finish_execution has been called on the task state"] pub fn duckdb_task_state_is_finished(state: duckdb_task_state) -> bool; } extern "C" { - #[doc = "Destroys the task state returned from duckdb_create_task_state."] - #[doc = ""] - #[doc = "Note that this should not be called while there is an active duckdb_execute_tasks_state running"] - #[doc = "on the task state."] - #[doc = ""] - #[doc = " state: The task state to clean up"] + #[doc = "Destroys the task state returned from duckdb_create_task_state.\n\nNote that this should not be called while there is an active duckdb_execute_tasks_state running\non the task state.\n\n state: The task state to clean up"] pub fn duckdb_destroy_task_state(state: duckdb_task_state); } pub type __builtin_va_list = [__va_list_tag; 1usize]; diff --git a/libduckdb-sys/duckdb/duckdb.cpp b/libduckdb-sys/duckdb/duckdb.cpp index 26f282fd..587816f7 100644 --- a/libduckdb-sys/duckdb/duckdb.cpp +++ b/libduckdb-sys/duckdb/duckdb.cpp @@ -5255,6 +5255,7 @@ struct SBIterator { + //===----------------------------------------------------------------------===// // DuckDB // @@ -5342,6 +5343,10 @@ class BlockHandle { can_destroy = can_destroy_p; } + inline const idx_t &GetMemoryUsage() const { + return memory_usage; + } + private: static BufferHandle Load(shared_ptr &handle, unique_ptr buffer = nullptr); unique_ptr UnloadAndTakeBlock(); @@ -5375,7 +5380,6 @@ class BlockHandle { - namespace duckdb { class BlockManager; class DatabaseInstance; @@ -5396,10 +5400,6 @@ class BufferManager { BufferManager(DatabaseInstance &db, string temp_directory, idx_t maximum_memory); virtual ~BufferManager(); - //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or - //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so - //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used. - shared_ptr RegisterMemory(idx_t block_size, bool can_destroy); //! Registers an in-memory buffer that cannot be unloaded until it is destroyed //! This buffer can be small (smaller than BLOCK_SIZE) //! Unpin and pin are nops on this block of memory @@ -5407,7 +5407,8 @@ class BufferManager { //! Allocate an in-memory buffer with a single pin. //! The allocated memory is released when the buffer handle is destroyed. - DUCKDB_API BufferHandle Allocate(idx_t block_size); + DUCKDB_API BufferHandle Allocate(idx_t block_size, bool can_destroy = true, + shared_ptr *block = nullptr); //! Reallocate an in-memory buffer that is pinned. void ReAllocate(shared_ptr &handle, idx_t block_size); @@ -5441,6 +5442,10 @@ class BufferManager { return db; } + static idx_t GetAllocSize(idx_t block_size) { + return AlignValue(block_size + Storage::BLOCK_HEADER_SIZE); + } + //! Construct a managed buffer. //! The block_id is just used for internal tracking. It doesn't map to any actual //! BlockManager. @@ -5451,6 +5456,12 @@ class BufferManager { DUCKDB_API void FreeReservedMemory(idx_t size); private: + //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or + //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so + //! it can be reloaded. The resulting buffer will already be allocated, but needs to be pinned in order to be used. + //! This needs to be private to prevent creating blocks without ever pinning them: + //! blocks that are never pinned are never added to the eviction queue + shared_ptr RegisterMemory(idx_t block_size, bool can_destroy); //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible //! (i.e. not enough blocks could be evicted) //! If the "buffer" argument is specified AND the system can find a buffer to re-use for the given allocation size @@ -5532,7 +5543,8 @@ struct RowDataBlock { RowDataBlock(BufferManager &buffer_manager, idx_t capacity, idx_t entry_size) : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) { idx_t size = MaxValue(Storage::BLOCK_SIZE, capacity * entry_size); - block = buffer_manager.RegisterMemory(size, false); + buffer_manager.Allocate(size, false, &block); + D_ASSERT(BufferManager::GetAllocSize(size) == block->GetMemoryUsage()); } explicit RowDataBlock(idx_t entry_size) : entry_size(entry_size) { } @@ -5606,17 +5618,23 @@ class RowDataCollection { count = 0; } - //! The size (in bytes) of this RowDataCollection if it were stored in a single block + //! The size (in bytes) of this RowDataCollection idx_t SizeInBytes() const { - idx_t bytes = 0; - if (entry_size == 1) { - for (auto &block : blocks) { - bytes += block->byte_offset; - } - } else { - bytes = count * entry_size; + VerifyBlockSizes(); + idx_t size = 0; + for (auto &block : blocks) { + size += block->block->GetMemoryUsage(); } - return bytes; + return size; + } + + //! Verifies that the block sizes are correct (Debug only) + void VerifyBlockSizes() const { +#ifdef DEBUG + for (auto &block : blocks) { + D_ASSERT(block->block->GetMemoryUsage() == BufferManager::GetAllocSize(block->capacity * entry_size)); + } +#endif } static inline idx_t EntriesPerBlock(idx_t width) { @@ -7638,7 +7656,7 @@ class DataTable { //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/bit_operations.hpp +// duckdb/common/radix.hpp // // //===----------------------------------------------------------------------===// @@ -10472,6 +10490,20 @@ unique_ptr TableCatalogEntry::AlterEntry(ClientContext &context, A } } +void TableCatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) { + D_ASSERT(!internal); + D_ASSERT(info->type == AlterType::ALTER_TABLE); + auto table_info = (AlterTableInfo *)info; + switch (table_info->alter_table_type) { + case AlterTableType::RENAME_TABLE: { + storage->info->table = this->name; + break; + default: + break; + } + } +} + static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) { if (expr.type == ExpressionType::COLUMN_REF) { auto &colref = (ColumnRefExpression &)expr; @@ -10570,6 +10602,8 @@ unique_ptr TableCatalogEntry::AddColumn(ClientContext &context, Ad create_info->constraints.push_back(constraint->Copy()); } Binder::BindLogicalType(context, info.new_column.TypeMutable(), schema->name); + info.new_column.SetOid(columns.LogicalColumnCount()); + info.new_column.SetStorageOid(columns.PhysicalColumnCount()); auto col = info.new_column.Copy(); create_info->columns.AddColumn(move(col)); @@ -11317,6 +11351,9 @@ unique_ptr CatalogEntry::AlterEntry(ClientContext &context, AlterI throw InternalException("Unsupported alter type for catalog entry!"); } +void CatalogEntry::UndoAlter(ClientContext &context, AlterInfo *info) { +} + unique_ptr CatalogEntry::Copy(ClientContext &context) { throw InternalException("Unsupported copy type for catalog entry!"); } @@ -11495,6 +11532,98 @@ class TransactionManager { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/mapping_value.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +namespace duckdb { +struct AlterInfo; + +class ClientContext; + +struct EntryIndex { + EntryIndex() : catalog(nullptr), index(DConstants::INVALID_INDEX) { + } + EntryIndex(CatalogSet &catalog, idx_t index) : catalog(&catalog), index(index) { + auto entry = catalog.entries.find(index); + if (entry == catalog.entries.end()) { + throw InternalException("EntryIndex - Catalog entry not found in constructor!?"); + } + catalog.entries[index].reference_count++; + } + ~EntryIndex() { + if (!catalog) { + return; + } + auto entry = catalog->entries.find(index); + D_ASSERT(entry != catalog->entries.end()); + auto remaining_ref = --entry->second.reference_count; + if (remaining_ref == 0) { + catalog->entries.erase(index); + } + catalog = nullptr; + } + // disable copy constructors + EntryIndex(const EntryIndex &other) = delete; + EntryIndex &operator=(const EntryIndex &) = delete; + //! enable move constructors + EntryIndex(EntryIndex &&other) noexcept { + catalog = nullptr; + index = DConstants::INVALID_INDEX; + std::swap(catalog, other.catalog); + std::swap(index, other.index); + } + EntryIndex &operator=(EntryIndex &&other) noexcept { + std::swap(catalog, other.catalog); + std::swap(index, other.index); + return *this; + } + + unique_ptr &GetEntry() { + auto entry = catalog->entries.find(index); + if (entry == catalog->entries.end()) { + throw InternalException("EntryIndex - Catalog entry not found!?"); + } + return entry->second.entry; + } + idx_t GetIndex() { + return index; + } + EntryIndex Copy() { + if (catalog) { + return EntryIndex(*catalog, index); + } else { + return EntryIndex(); + } + } + +private: + CatalogSet *catalog; + idx_t index; +}; + +struct MappingValue { + explicit MappingValue(EntryIndex index_p) : index(move(index_p)), timestamp(0), deleted(false), parent(nullptr) { + } + + EntryIndex index; + transaction_t timestamp; + bool deleted; + unique_ptr child; + MappingValue *parent; +}; + +} // namespace duckdb + namespace duckdb { @@ -11508,27 +11637,44 @@ namespace duckdb { class EntryDropper { public: //! Both constructor and destructor are privates because they should only be called by DropEntryDependencies - explicit EntryDropper(CatalogSet &catalog_set, idx_t entry_index) - : catalog_set(catalog_set), entry_index(entry_index) { - old_deleted = catalog_set.entries[entry_index].get()->deleted; + explicit EntryDropper(EntryIndex &entry_index_p) : entry_index(entry_index_p) { + old_deleted = entry_index.GetEntry()->deleted; } ~EntryDropper() { - catalog_set.entries[entry_index].get()->deleted = old_deleted; + entry_index.GetEntry()->deleted = old_deleted; } private: - //! The current catalog_set - CatalogSet &catalog_set; //! Keeps track of the state of the entry before starting the delete bool old_deleted; //! Index of entry to be deleted - idx_t entry_index; + EntryIndex &entry_index; }; CatalogSet::CatalogSet(Catalog &catalog, unique_ptr defaults) : catalog(catalog), defaults(move(defaults)) { } +CatalogSet::~CatalogSet() { +} + +EntryIndex CatalogSet::PutEntry(idx_t entry_index, unique_ptr entry) { + if (entries.find(entry_index) != entries.end()) { + throw InternalException("Entry with entry index \"%llu\" already exists", entry_index); + } + entries.insert(make_pair(entry_index, EntryValue(move(entry)))); + return EntryIndex(*this, entry_index); +} + +void CatalogSet::PutEntry(EntryIndex index, unique_ptr catalog_entry) { + auto entry = entries.find(index.GetIndex()); + if (entry == entries.end()) { + throw InternalException("Entry with entry index \"%llu\" does not exist", index.GetIndex()); + } + catalog_entry->child = move(entry->second.entry); + catalog_entry->child->parent = catalog_entry.get(); + entry->second.entry = move(catalog_entry); +} bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ptr value, unordered_set &dependencies) { @@ -11539,7 +11685,7 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ unique_lock read_lock(catalog_lock); // first check if the entry exists in the unordered set - idx_t entry_index; + idx_t index; auto mapping_value = GetMapping(context, name); if (mapping_value == nullptr || mapping_value->deleted) { // if it does not: entry has never been created @@ -11553,17 +11699,17 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ // first create a dummy deleted entry for this entry // so transactions started before the commit of this transaction don't // see it yet - entry_index = current_entry++; auto dummy_node = make_unique(CatalogType::INVALID, value->catalog, name); dummy_node->timestamp = 0; dummy_node->deleted = true; dummy_node->set = this; - entries[entry_index] = move(dummy_node); - PutMapping(context, name, entry_index); + auto entry_index = PutEntry(current_entry++, move(dummy_node)); + index = entry_index.GetIndex(); + PutMapping(context, name, move(entry_index)); } else { - entry_index = mapping_value->index; - auto ¤t = *entries[entry_index]; + index = mapping_value->index.GetIndex(); + auto ¤t = *mapping_value->index.GetEntry(); // if it does, we have to check version numbers if (HasConflict(context, current.timestamp)) { // current version has been written to by a currently active @@ -11585,16 +11731,16 @@ bool CatalogSet::CreateEntry(ClientContext &context, const string &name, unique_ // now add the dependency set of this object to the dependency manager catalog.dependency_manager->AddObject(context, value.get(), dependencies); - value->child = move(entries[entry_index]); - value->child->parent = value.get(); + auto value_ptr = value.get(); + EntryIndex entry_index(*this, index); + PutEntry(move(entry_index), move(value)); // push the old entry in the undo buffer for this transaction - transaction.PushCatalogEntry(value->child.get()); - entries[entry_index] = move(value); + transaction.PushCatalogEntry(value_ptr->child.get()); return true; } -bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&catalog_entry) { - catalog_entry = entries[entry_index].get(); +bool CatalogSet::GetEntryInternal(ClientContext &context, EntryIndex &entry_index, CatalogEntry *&catalog_entry) { + catalog_entry = entry_index.GetEntry().get(); // if it does: we have to retrieve the entry and to check version numbers if (HasConflict(context, catalog_entry->timestamp)) { // current version has been written to by a currently active @@ -11610,21 +11756,22 @@ bool CatalogSet::GetEntryInternal(ClientContext &context, idx_t entry_index, Cat return true; } -bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index, +bool CatalogSet::GetEntryInternal(ClientContext &context, const string &name, EntryIndex *entry_index, CatalogEntry *&catalog_entry) { auto mapping_value = GetMapping(context, name); if (mapping_value == nullptr || mapping_value->deleted) { // the entry does not exist, check if we can create a default entry return false; } - entry_index = mapping_value->index; - return GetEntryInternal(context, entry_index, catalog_entry); + if (entry_index) { + *entry_index = mapping_value->index.Copy(); + } + return GetEntryInternal(context, mapping_value->index, catalog_entry); } bool CatalogSet::AlterOwnership(ClientContext &context, ChangeOwnershipInfo *info) { - idx_t entry_index; CatalogEntry *entry; - if (!GetEntryInternal(context, info->name, entry_index, entry)) { + if (!GetEntryInternal(context, info->name, nullptr, entry)) { return false; } @@ -11644,9 +11791,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf lock_guard write_lock(catalog.write_lock); // first check if the entry exists in the unordered set - idx_t entry_index; + EntryIndex entry_index; CatalogEntry *entry; - if (!GetEntryInternal(context, name, entry_index, entry)) { + if (!GetEntryInternal(context, name, &entry_index, entry)) { return false; } if (entry->internal) { @@ -11669,8 +11816,9 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf if (value->name != original_name) { auto mapping_value = GetMapping(context, value->name); if (mapping_value && !mapping_value->deleted) { - auto entry = GetEntryForTransaction(context, entries[mapping_value->index].get()); - if (!entry->deleted) { + auto original_entry = GetEntryForTransaction(context, mapping_value->index.GetEntry().get()); + if (!original_entry->deleted) { + entry->UndoAlter(context, alter_info); string rename_err_msg = "Could not rename \"%s\" to \"%s\": another entry with this name already exists!"; throw CatalogException(rename_err_msg, original_name, value->name); @@ -11680,25 +11828,22 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf if (value->name != original_name) { // Do PutMapping and DeleteMapping after dependency check - PutMapping(context, value->name, entry_index); + PutMapping(context, value->name, entry_index.Copy()); DeleteMapping(context, original_name); } value->timestamp = transaction.transaction_id; - value->child = move(entries[entry_index]); - value->child->parent = value.get(); value->set = this; + auto new_entry = value.get(); + PutEntry(move(entry_index), move(value)); // serialize the AlterInfo into a temporary buffer BufferedSerializer serializer; alter_info->Serialize(serializer); BinaryData serialized_alter = serializer.GetData(); - auto new_entry = value.get(); - // push the old entry in the undo buffer for this transaction - transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size); - entries[entry_index] = move(value); + transaction.PushCatalogEntry(new_entry->child.get(), serialized_alter.data.get(), serialized_alter.size); // Check the dependency manager to verify that there are no conflicting dependencies with this alter // Note that we do this AFTER the new entry has been entirely set up in the catalog set @@ -11709,13 +11854,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf return true; } -void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) { - +void CatalogSet::DropEntryDependencies(ClientContext &context, EntryIndex &entry_index, CatalogEntry &entry, + bool cascade) { // Stores the deleted value of the entry before starting the process - EntryDropper dropper(*this, entry_index); + EntryDropper dropper(entry_index); // To correctly delete the object and its dependencies, it temporarily is set to deleted. - entries[entry_index].get()->deleted = true; + entry_index.GetEntry()->deleted = true; // check any dependencies of this object entry.catalog->dependency_manager->DropObject(context, &entry, cascade); @@ -11725,7 +11870,7 @@ void CatalogSet::DropEntryDependencies(ClientContext &context, idx_t entry_index // dropper.~EntryDropper() } -void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade) { +void CatalogSet::DropEntryInternal(ClientContext &context, EntryIndex entry_index, CatalogEntry &entry, bool cascade) { auto &transaction = Transaction::GetTransaction(context); DropEntryDependencies(context, entry_index, entry, cascade); @@ -11735,31 +11880,30 @@ void CatalogSet::DropEntryInternal(ClientContext &context, idx_t entry_index, Ca // and point it at the dummy node auto value = make_unique(CatalogType::DELETED_ENTRY, entry.catalog, entry.name); value->timestamp = transaction.transaction_id; - value->child = move(entries[entry_index]); - value->child->parent = value.get(); value->set = this; value->deleted = true; + auto value_ptr = value.get(); + PutEntry(move(entry_index), move(value)); // push the old entry in the undo buffer for this transaction - transaction.PushCatalogEntry(value->child.get()); - - entries[entry_index] = move(value); + transaction.PushCatalogEntry(value_ptr->child.get()); } bool CatalogSet::DropEntry(ClientContext &context, const string &name, bool cascade) { // lock the catalog for writing lock_guard write_lock(catalog.write_lock); // we can only delete an entry that exists - idx_t entry_index; + EntryIndex entry_index; CatalogEntry *entry; - if (!GetEntryInternal(context, name, entry_index, entry)) { + if (!GetEntryInternal(context, name, &entry_index, entry)) { return false; } if (entry->internal) { throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name); } - DropEntryInternal(context, entry_index, *entry, cascade); + lock_guard read_lock(catalog_lock); + DropEntryInternal(context, move(entry_index), *entry, cascade); return true; } @@ -11777,12 +11921,10 @@ void CatalogSet::CleanupEntry(CatalogEntry *catalog_entry) { if (parent->deleted && !parent->child && !parent->parent) { auto mapping_entry = mapping.find(parent->name); D_ASSERT(mapping_entry != mapping.end()); - auto index = mapping_entry->second->index; - auto entry = entries.find(index); - D_ASSERT(entry != entries.end()); - if (entry->second.get() == parent) { + auto entry = mapping_entry->second->index.GetEntry().get(); + D_ASSERT(entry); + if (entry == parent) { mapping.erase(mapping_entry); - entries.erase(entry); } } } @@ -11816,9 +11958,9 @@ MappingValue *CatalogSet::GetMapping(ClientContext &context, const string &name, return mapping_value; } -void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t entry_index) { +void CatalogSet::PutMapping(ClientContext &context, const string &name, EntryIndex entry_index) { auto entry = mapping.find(name); - auto new_value = make_unique(entry_index); + auto new_value = make_unique(move(entry_index)); new_value->timestamp = Transaction::GetTransaction(context).transaction_id; if (entry != mapping.end()) { if (HasConflict(context, entry->second->timestamp)) { @@ -11833,7 +11975,7 @@ void CatalogSet::PutMapping(ClientContext &context, const string &name, idx_t en void CatalogSet::DeleteMapping(ClientContext &context, const string &name) { auto entry = mapping.find(name); D_ASSERT(entry != mapping.end()); - auto delete_marker = make_unique(entry->second->index); + auto delete_marker = make_unique(entry->second->index.Copy()); delete_marker->deleted = true; delete_marker->timestamp = Transaction::GetTransaction(context).transaction_id; delete_marker->child = move(entry->second); @@ -11901,15 +12043,14 @@ CatalogEntry *CatalogSet::CreateEntryInternal(ClientContext &context, unique_ptr return nullptr; } auto &name = entry->name; - auto entry_index = current_entry++; auto catalog_entry = entry.get(); entry->set = this; entry->timestamp = 0; - PutMapping(context, name, entry_index); + auto entry_index = PutEntry(current_entry++, move(entry)); + PutMapping(context, name, move(entry_index)); mapping[name]->timestamp = 0; - entries[entry_index] = move(entry); return catalog_entry; } @@ -11948,7 +12089,7 @@ CatalogEntry *CatalogSet::GetEntry(ClientContext &context, const string &name) { // we found an entry for this name // check the version numbers - auto catalog_entry = entries[mapping_value->index].get(); + auto catalog_entry = mapping_value->index.GetEntry().get(); CatalogEntry *current = GetEntryForTransaction(context, catalog_entry); if (current->deleted || (current->name != name && !UseTimestamp(context, mapping_value->timestamp))) { return nullptr; @@ -12057,7 +12198,7 @@ void CatalogSet::Undo(CatalogEntry *entry) { // otherwise we need to update the base entry tables auto &name = entry->name; to_be_removed_node->child->SetAsRoot(); - entries[mapping[name]->index] = move(to_be_removed_node->child); + mapping[name]->index.GetEntry() = move(to_be_removed_node->child); entry->parent = nullptr; } @@ -12072,7 +12213,7 @@ void CatalogSet::Undo(CatalogEntry *entry) { } } // we mark the catalog as being modified, since this action can lead to e.g. tables being dropped - entry->catalog->ModifyCatalog(); + catalog.ModifyCatalog(); } void CatalogSet::CreateDefaultEntries(ClientContext &context, unique_lock &lock) { @@ -12105,7 +12246,7 @@ void CatalogSet::Scan(ClientContext &context, const std::functiondeleted) { callback(entry); @@ -12117,7 +12258,7 @@ void CatalogSet::Scan(const std::function &callback) { // lock the catalog set lock_guard lock(catalog_lock); for (auto &kv : entries) { - auto entry = kv.second.get(); + auto entry = kv.second.entry.get(); entry = GetCommittedEntry(entry); if (!entry->deleted) { callback(entry); @@ -12645,14 +12786,17 @@ static DefaultView internal_views[] = { {"pg_catalog", "pg_attrdef", "SELECT column_index oid, table_oid adrelid, column_index adnum, column_default adbin from duckdb_columns() where column_default is not null;"}, {"pg_catalog", "pg_class", "SELECT table_oid oid, table_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, estimated_size::real reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, index_count > 0 relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'r' relkind, column_count relnatts, check_constraint_count relchecks, false relhasoids, has_primary_key relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_tables() UNION ALL SELECT view_oid oid, view_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'v' relkind, column_count relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_views() UNION ALL SELECT sequence_oid oid, sequence_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, case when temporary then 't' else 'p' end relpersistence, 'S' relkind, 0 relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_sequences() UNION ALL SELECT index_oid oid, index_name relname, schema_oid relnamespace, 0 reltype, 0 reloftype, 0 relowner, 0 relam, 0 relfilenode, 0 reltablespace, 0 relpages, 0 reltuples, 0 relallvisible, 0 reltoastrelid, 0 reltoastidxid, false relhasindex, false relisshared, 't' relpersistence, 'i' relkind, NULL relnatts, 0 relchecks, false relhasoids, false relhaspkey, false relhasrules, false relhastriggers, false relhassubclass, false relrowsecurity, true relispopulated, NULL relreplident, false relispartition, 0 relrewrite, 0 relfrozenxid, NULL relminmxid, NULL relacl, NULL reloptions, NULL relpartbound FROM duckdb_indexes()"}, {"pg_catalog", "pg_constraint", "SELECT table_oid*1000000+constraint_index oid, constraint_text conname, schema_oid connamespace, CASE constraint_type WHEN 'CHECK' then 'c' WHEN 'UNIQUE' then 'u' WHEN 'PRIMARY KEY' THEN 'p' WHEN 'FOREIGN KEY' THEN 'f' ELSE 'x' END contype, false condeferrable, false condeferred, true convalidated, table_oid conrelid, 0 contypid, 0 conindid, 0 conparentid, 0 confrelid, NULL confupdtype, NULL confdeltype, NULL confmatchtype, true conislocal, 0 coninhcount, false connoinherit, constraint_column_indexes conkey, NULL confkey, NULL conpfeqop, NULL conppeqop, NULL conffeqop, NULL conexclop, expression conbin FROM duckdb_constraints()"}, + {"pg_catalog", "pg_database", "SELECT 0 oid, 'main' datname"}, {"pg_catalog", "pg_depend", "SELECT * FROM duckdb_dependencies()"}, {"pg_catalog", "pg_description", "SELECT NULL objoid, NULL classoid, NULL objsubid, NULL description WHERE 1=0"}, {"pg_catalog", "pg_enum", "SELECT NULL oid, NULL enumtypid, NULL enumsortorder, NULL enumlabel WHERE 1=0"}, {"pg_catalog", "pg_index", "SELECT index_oid indexrelid, table_oid indrelid, 0 indnatts, 0 indnkeyatts, is_unique indisunique, is_primary indisprimary, false indisexclusion, true indimmediate, false indisclustered, true indisvalid, false indcheckxmin, true indisready, true indislive, false indisreplident, NULL::INT[] indkey, NULL::OID[] indcollation, NULL::OID[] indclass, NULL::INT[] indoption, expressions indexprs, NULL indpred FROM duckdb_indexes()"}, {"pg_catalog", "pg_indexes", "SELECT schema_name schemaname, table_name tablename, index_name indexname, NULL \"tablespace\", sql indexdef FROM duckdb_indexes()"}, {"pg_catalog", "pg_namespace", "SELECT oid, schema_name nspname, 0 nspowner, NULL nspacl FROM duckdb_schemas()"}, + {"pg_catalog", "pg_proc", "SELECT f.function_oid oid, function_name proname, s.oid pronamespace FROM duckdb_functions() f LEFT JOIN duckdb_schemas() s USING (schema_name)"}, {"pg_catalog", "pg_sequence", "SELECT sequence_oid seqrelid, 0 seqtypid, start_value seqstart, increment_by seqincrement, max_value seqmax, min_value seqmin, 0 seqcache, cycle seqcycle FROM duckdb_sequences()"}, {"pg_catalog", "pg_sequences", "SELECT schema_name schemaname, sequence_name sequencename, 'duckdb' sequenceowner, 0 data_type, start_value, min_value, max_value, increment_by, cycle, 0 cache_size, last_value FROM duckdb_sequences()"}, + {"pg_catalog", "pg_settings", "SELECT name, value setting, description short_desc, CASE WHEN input_type = 'VARCHAR' THEN 'string' WHEN input_type = 'BOOLEAN' THEN 'bool' WHEN input_type IN ('BIGINT', 'UBIGINT') THEN 'integer' ELSE input_type END vartype FROM duckdb_settings()"}, {"pg_catalog", "pg_tables", "SELECT schema_name schemaname, table_name tablename, 'duckdb' tableowner, NULL \"tablespace\", index_count > 0 hasindexes, false hasrules, false hastriggers FROM duckdb_tables()"}, {"pg_catalog", "pg_tablespace", "SELECT 0 oid, 'pg_default' spcname, 0 spcowner, NULL spcacl, NULL spcoptions"}, {"pg_catalog", "pg_type", "SELECT type_oid oid, format_pg_type(type_name) typname, schema_oid typnamespace, 0 typowner, type_size typlen, false typbyval, 'b' typtype, CASE WHEN type_category='NUMERIC' THEN 'N' WHEN type_category='STRING' THEN 'S' WHEN type_category='DATETIME' THEN 'D' WHEN type_category='BOOLEAN' THEN 'B' WHEN type_category='COMPOSITE' THEN 'C' WHEN type_category='USER' THEN 'U' ELSE 'X' END typcategory, false typispreferred, true typisdefined, NULL typdelim, NULL typrelid, NULL typsubscript, NULL typelem, NULL typarray, NULL typinput, NULL typoutput, NULL typreceive, NULL typsend, NULL typmodin, NULL typmodout, NULL typanalyze, 'd' typalign, 'p' typstorage, NULL typnotnull, NULL typbasetype, NULL typtypmod, NULL typndims, NULL typcollation, NULL typdefaultbin, NULL typdefault, NULL typacl FROM duckdb_types();"}, @@ -12719,6 +12863,7 @@ vector DefaultViewGenerator::GetDefaultEntries() { + namespace duckdb { DependencyManager::DependencyManager(Catalog &catalog) : catalog(catalog) { @@ -12728,12 +12873,11 @@ void DependencyManager::AddObject(ClientContext &context, CatalogEntry *object, unordered_set &dependencies) { // check for each object in the sources if they were not deleted yet for (auto &dependency : dependencies) { - idx_t entry_index; CatalogEntry *catalog_entry; if (!dependency->set) { throw InternalException("Dependency has no set"); } - if (!dependency->set->GetEntryInternal(context, dependency->name, entry_index, catalog_entry)) { + if (!dependency->set->GetEntryInternal(context, dependency->name, nullptr, catalog_entry)) { throw InternalException("Dependency has already been deleted?"); } } @@ -12761,10 +12905,9 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object, if (mapping_value == nullptr) { continue; } - idx_t entry_index = mapping_value->index; CatalogEntry *dependency_entry; - if (!catalog_set.GetEntryInternal(context, entry_index, dependency_entry)) { + if (!catalog_set.GetEntryInternal(context, mapping_value->index, dependency_entry)) { // the dependent object was already deleted, no conflict continue; } @@ -12772,7 +12915,7 @@ void DependencyManager::DropObject(ClientContext &context, CatalogEntry *object, if (cascade || dep.dependency_type == DependencyType::DEPENDENCY_AUTOMATIC || dep.dependency_type == DependencyType::DEPENDENCY_OWNS) { // cascade: drop the dependent object - catalog_set.DropEntryInternal(context, entry_index, *dependency_entry, cascade); + catalog_set.DropEntryInternal(context, mapping_value->index.Copy(), *dependency_entry, cascade); } else { // no cascade and there are objects that depend on this object: throw error throw DependencyException("Cannot drop entry \"%s\" because there are entries that " @@ -12792,9 +12935,8 @@ void DependencyManager::AlterObject(ClientContext &context, CatalogEntry *old_ob for (auto &dep : dependent_objects) { // look up the entry in the catalog set auto &catalog_set = *dep.entry->set; - idx_t entry_index; CatalogEntry *dependency_entry; - if (!catalog_set.GetEntryInternal(context, dep.entry->name, entry_index, dependency_entry)) { + if (!catalog_set.GetEntryInternal(context, dep.entry->name, nullptr, dependency_entry)) { // the dependent object was already deleted, no conflict continue; } @@ -12956,6 +13098,9 @@ AllocatedData::AllocatedData() : allocator(nullptr), pointer(nullptr), allocated AllocatedData::AllocatedData(Allocator &allocator, data_ptr_t pointer, idx_t allocated_size) : allocator(&allocator), pointer(pointer), allocated_size(allocated_size) { + if (!pointer) { + throw InternalException("AllocatedData object constructed with nullptr"); + } } AllocatedData::~AllocatedData() { Reset(); @@ -13047,11 +13192,19 @@ Allocator::~Allocator() { data_ptr_t Allocator::AllocateData(idx_t size) { D_ASSERT(size > 0); + if (size >= MAXIMUM_ALLOC_SIZE) { + D_ASSERT(false); + throw InternalException("Requested allocation size of %llu is out of range - maximum allocation size is %llu", + size, MAXIMUM_ALLOC_SIZE); + } auto result = allocate_function(private_data.get(), size); #ifdef DEBUG D_ASSERT(private_data); private_data->debug_info->AllocateData(result, size); #endif + if (!result) { + throw std::bad_alloc(); + } return result; } @@ -13071,11 +13224,20 @@ data_ptr_t Allocator::ReallocateData(data_ptr_t pointer, idx_t old_size, idx_t s if (!pointer) { return nullptr; } + if (size >= MAXIMUM_ALLOC_SIZE) { + D_ASSERT(false); + throw InternalException( + "Requested re-allocation size of %llu is out of range - maximum allocation size is %llu", size, + MAXIMUM_ALLOC_SIZE); + } auto new_pointer = reallocate_function(private_data.get(), pointer, old_size, size); #ifdef DEBUG D_ASSERT(private_data); private_data->debug_info->ReallocateData(pointer, new_pointer, old_size, size); #endif + if (!new_pointer) { + throw std::bad_alloc(); + } return new_pointer; } @@ -15690,6 +15852,13 @@ void BoxRenderer::Render(ClientContext &context, const vector &names, co // figure out how many/which rows to render idx_t row_count = result.Count(); idx_t rows_to_render = MinValue(row_count, config.max_rows); + if (row_count <= config.max_rows + 3) { + // hiding rows adds 3 extra rows + // so hiding rows makes no sense if we are only slightly over the limit + // if we are 1 row over the limit hiding rows will actually increase the number of lines we display! + // in this case render all the rows + rows_to_render = row_count; + } idx_t top_rows; idx_t bottom_rows; if (rows_to_render == row_count) { @@ -25167,6 +25336,7 @@ FileType FileHandle::GetType() { + //===----------------------------------------------------------------------===// // DuckDB // @@ -30817,6 +30987,7 @@ unique_ptr FileSystem::CreateLocal() { + namespace duckdb { struct ConvertToString { @@ -32439,8 +32610,11 @@ string_t StringCastFromDecimal::Operation(hugeint_t input, uint8_t width, uint8_ + namespace duckdb { +struct interval_t; + struct MultiplyOperator { template static inline TR Operation(TA left, TB right) { @@ -38778,7 +38952,7 @@ class ColumnDataAllocator { private: void AllocateEmptyBlock(idx_t size); - void AllocateBlock(); + BufferHandle AllocateBlock(); BufferHandle Pin(uint32_t block_id); BufferHandle PinInternal(uint32_t block_id); @@ -38892,11 +39066,7 @@ class PartitionedColumnData { return make_unique(allocators->allocators[partition_index], types); } //! Create a DataChunk used for buffering appends to the partition - unique_ptr CreatePartitionBuffer() const { - auto result = make_unique(); - result->Initialize(Allocator::Get(context), types, BufferSize()); - return result; - } + unique_ptr CreatePartitionBuffer() const; protected: PartitionedColumnDataType type; @@ -39273,6 +39443,9 @@ struct PartitionFunctor { const auto row_width = layout.GetRowWidth(); const auto has_heap = !layout.AllConstant(); + block_collection.VerifyBlockSizes(); + string_heap.VerifyBlockSizes(); + // Fixed-size data RowDataBlock *partition_data_blocks[CONSTANTS::NUM_PARTITIONS]; vector partition_data_handles; @@ -39407,6 +39580,10 @@ struct PartitionFunctor { #ifdef DEBUG for (idx_t bin = 0; bin < CONSTANTS::NUM_PARTITIONS; bin++) { auto &p_block_collection = *partition_block_collections[bin]; + p_block_collection.VerifyBlockSizes(); + if (!layout.AllConstant()) { + partition_string_heaps[bin]->VerifyBlockSizes(); + } idx_t p_count = 0; for (idx_t b = 0; b < p_block_collection.blocks.size(); b++) { auto &data_block = *p_block_collection.blocks[b]; @@ -47316,14 +47493,13 @@ static void SortTiedBlobs(BufferManager &buffer_manager, const data_ptr_t datapt return order * Comparators::CompareVal(left_ptr, right_ptr, logical_type) < 0; }); // Re-order - auto temp_block = - buffer_manager.Allocate(MaxValue((end - start) * sort_layout.entry_size, (idx_t)Storage::BLOCK_SIZE)); - data_ptr_t temp_ptr = temp_block.Ptr(); + auto temp_block = buffer_manager.GetBufferAllocator().Allocate((end - start) * sort_layout.entry_size); + data_ptr_t temp_ptr = temp_block.get(); for (idx_t i = 0; i < end - start; i++) { FastMemcpy(temp_ptr, entry_ptrs[i], sort_layout.entry_size); temp_ptr += sort_layout.entry_size; } - memcpy(dataptr + start * sort_layout.entry_size, temp_block.Ptr(), (end - start) * sort_layout.entry_size); + memcpy(dataptr + start * sort_layout.entry_size, temp_block.get(), (end - start) * sort_layout.entry_size); // Determine if there are still ties (if this is not the last column) if (tie_col < sort_layout.column_count - 1) { data_ptr_t idx_ptr = dataptr + start * sort_layout.entry_size + sort_layout.comparison_size; @@ -47388,7 +47564,7 @@ static void ComputeTies(data_ptr_t dataptr, const idx_t &count, const idx_t &col //! Textbook LSD radix sort void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, const idx_t &count, const idx_t &col_offset, const idx_t &row_width, const idx_t &sorting_size) { - auto temp_block = buffer_manager.Allocate(MaxValue(count * row_width, (idx_t)Storage::BLOCK_SIZE)); + auto temp_block = buffer_manager.GetBufferAllocator().Allocate(count * row_width); bool swap = false; idx_t counts[SortConstants::VALUES_PER_RADIX]; @@ -47396,8 +47572,8 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons // Init counts to 0 memset(counts, 0, sizeof(counts)); // Const some values for convenience - const data_ptr_t source_ptr = swap ? temp_block.Ptr() : dataptr; - const data_ptr_t target_ptr = swap ? dataptr : temp_block.Ptr(); + const data_ptr_t source_ptr = swap ? temp_block.get() : dataptr; + const data_ptr_t target_ptr = swap ? dataptr : temp_block.get(); const idx_t offset = col_offset + sorting_size - r; // Collect counts data_ptr_t offset_ptr = source_ptr + offset; @@ -47425,7 +47601,7 @@ void RadixSortLSD(BufferManager &buffer_manager, const data_ptr_t &dataptr, cons } // Move data back to original buffer (if it was swapped) if (swap) { - memcpy(dataptr, temp_block.Ptr(), count * row_width); + memcpy(dataptr, temp_block.get(), count * row_width); } } @@ -47773,6 +47949,9 @@ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const { } LocalSortState::LocalSortState() : initialized(false) { + if (!Radix::IsLittleEndian()) { + throw NotImplementedException("Sorting is not supported on big endian architectures"); + } } void LocalSortState::Initialize(GlobalSortState &global_sort_state, BufferManager &buffer_manager_p) { @@ -51592,13 +51771,14 @@ BufferHandle ColumnDataAllocator::PinInternal(uint32_t block_id) { return alloc.buffer_manager->Pin(blocks[block_id].handle); } -void ColumnDataAllocator::AllocateBlock() { +BufferHandle ColumnDataAllocator::AllocateBlock() { D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR); BlockMetaData data; data.size = 0; data.capacity = Storage::BLOCK_SIZE; - data.handle = alloc.buffer_manager->RegisterMemory(Storage::BLOCK_SIZE, false); + auto pin = alloc.buffer_manager->Allocate(Storage::BLOCK_SIZE, false, &data.handle); blocks.push_back(move(data)); + return pin; } void ColumnDataAllocator::AllocateEmptyBlock(idx_t size) { @@ -51632,11 +51812,10 @@ void ColumnDataAllocator::AllocateBuffer(idx_t size, uint32_t &block_id, uint32_ ChunkManagementState *chunk_state) { D_ASSERT(allocated_data.empty()); if (blocks.empty() || blocks.back().Capacity() < size) { - AllocateBlock(); - if (chunk_state && !blocks.empty()) { - auto &last_block = blocks.back(); + auto pinned_block = AllocateBlock(); + if (chunk_state) { + D_ASSERT(!blocks.empty()); auto new_block_id = blocks.size() - 1; - auto pinned_block = alloc.buffer_manager->Pin(last_block.handle); chunk_state->handles[new_block_id] = move(pinned_block); } } @@ -52581,7 +52760,7 @@ namespace duckdb { ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr allocator_p, vector types_p) - : allocator(move(allocator_p)), types(move(types_p)), count(0) { + : allocator(move(allocator_p)), types(move(types_p)), count(0), heap(allocator->GetAllocator()) { } idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) { @@ -53961,6 +54140,7 @@ string Decimal::ToString(hugeint_t value, uint8_t width, uint8_t scale) { + #include #include @@ -54101,6 +54281,7 @@ hash_t Hash(uint8_t *val, size_t size) { + #include #include @@ -54595,6 +54776,13 @@ bool Hugeint::TryConvert(int8_t value, hugeint_t &result) { return true; } +template <> +bool Hugeint::TryConvert(const char *value, hugeint_t &result) { + auto len = strlen(value); + string_t string_val(value, len); + return TryCast::Operation(string_val, result, true); +} + template <> bool Hugeint::TryConvert(int16_t value, hugeint_t &result) { result = HugeintConvertInteger(value); @@ -55194,6 +55382,7 @@ DUCKDB_API DatePartSpecifier GetDatePartSpecifier(const string &specifier); + namespace duckdb { struct AddOperator { @@ -55321,8 +55510,14 @@ dtime_t AddTimeOperator::Operation(interval_t left, dtime_t right); + namespace duckdb { +struct interval_t; +struct date_t; +struct timestamp_t; +struct dtime_t; + struct SubtractOperator { template static inline TR Operation(TA left, TB right) { @@ -55963,6 +56158,12 @@ void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendSta InitializeAppendStateInternal(state); } +unique_ptr PartitionedColumnData::CreatePartitionBuffer() const { + auto result = make_unique(); + result->Initialize(BufferManager::GetBufferManager(context).GetBufferAllocator(), types, BufferSize()); + return result; +} + void PartitionedColumnData::Append(PartitionedColumnDataAppendState &state, DataChunk &input) { // Compute partition indices and store them in state.partition_indices ComputePartitionIndices(state, input); @@ -56649,7 +56850,7 @@ buffer_ptr SelectionVector::Slice(const SelectionVector &sel, idx namespace duckdb { -StringHeap::StringHeap() : allocator(Allocator::DefaultAllocator()) { +StringHeap::StringHeap(Allocator &allocator) : allocator(allocator) { } void StringHeap::Destroy() { @@ -65709,7 +65910,9 @@ static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVe const auto child_count = ListVector::GetListSize(input); Vector child_hashes(LogicalType::HASH, child_count); - VectorOperations::Hash(child, child_hashes, child_count); + if (child_count > 0) { + VectorOperations::Hash(child, child_hashes, child_count); + } auto chdata = FlatVector::GetData(child_hashes); // Reduce the number of entries to check to the non-empty ones @@ -67109,11 +67312,13 @@ class ColumnBindingResolver : public LogicalOperatorVisitor { ColumnBindingResolver(); void VisitOperator(LogicalOperator &op) override; + static void Verify(LogicalOperator &op); protected: vector bindings; unique_ptr VisitReplace(BoundColumnRefExpression &expr, unique_ptr *expr_ptr) override; + static unordered_set VerifyInternal(LogicalOperator &op); }; } // namespace duckdb @@ -67455,6 +67660,35 @@ unique_ptr ColumnBindingResolver::VisitReplace(BoundColumnRefExpress // LCOV_EXCL_STOP } +unordered_set ColumnBindingResolver::VerifyInternal(LogicalOperator &op) { + unordered_set result; + for (auto &child : op.children) { + auto child_indexes = VerifyInternal(*child); + for (auto index : child_indexes) { + D_ASSERT(index != DConstants::INVALID_INDEX); + if (result.find(index) != result.end()) { + throw InternalException("Duplicate table index \"%lld\" found", index); + } + result.insert(index); + } + } + auto indexes = op.GetTableIndex(); + for (auto index : indexes) { + D_ASSERT(index != DConstants::INVALID_INDEX); + if (result.find(index) != result.end()) { + throw InternalException("Duplicate table index \"%lld\" found", index); + } + result.insert(index); + } + return result; +} + +void ColumnBindingResolver::Verify(LogicalOperator &op) { +#ifdef DEBUG + VerifyInternal(op); +#endif +} + } // namespace duckdb @@ -68985,6 +69219,9 @@ ART::ART(const vector &column_ids, TableIOManager &table_io_manager, DatabaseInstance &db, idx_t block_id, idx_t block_offset) : Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db), estimated_art_size(0), estimated_key_size(16) { + if (!Radix::IsLittleEndian()) { + throw NotImplementedException("ART indexes are not supported on big endian architectures"); + } if (block_id != DConstants::INVALID_INDEX) { tree = Node::Deserialize(*this, block_id, block_offset); } else { @@ -69268,7 +69505,7 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator auto payload_types = logical_types; payload_types.emplace_back(LogicalType::ROW_TYPE); - ArenaAllocator arena_allocator(allocator); + ArenaAllocator arena_allocator(BufferAllocator::Get(db)); vector keys(STANDARD_VECTOR_SIZE); auto temp_art = make_unique(this->column_ids, this->table_io_manager, this->unbound_expressions, @@ -69325,7 +69562,7 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) { D_ASSERT(logical_types[0] == input.data[0].GetType()); // generate the keys for the given input - ArenaAllocator arena_allocator(Allocator::DefaultAllocator()); + ArenaAllocator arena_allocator(BufferAllocator::Get(db)); vector keys(input.size()); GenerateKeys(arena_allocator, input, keys); @@ -69485,7 +69722,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) { estimated_art_size -= released_memory; // then generate the keys for the given input - ArenaAllocator arena_allocator(Allocator::DefaultAllocator()); + ArenaAllocator arena_allocator(BufferAllocator::Get(db)); vector keys(expression.size()); GenerateKeys(arena_allocator, expression, keys); @@ -69729,7 +69966,7 @@ bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table // FIXME: the key directly owning the data for a single key might be more efficient D_ASSERT(state->values[0].type().InternalType() == types[0]); - ArenaAllocator arena_allocator(Allocator::DefaultAllocator()); + ArenaAllocator arena_allocator(Allocator::Get(db)); auto key = CreateKey(arena_allocator, types[0], state->values[0]); if (state->values[1].IsNull()) { @@ -69804,7 +70041,7 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str ExecuteExpressions(chunk, expression_chunk); // generate the keys for the given input - ArenaAllocator arena_allocator(Allocator::DefaultAllocator()); + ArenaAllocator arena_allocator(BufferAllocator::Get(db)); vector keys(expression_chunk.size()); GenerateKeys(arena_allocator, expression_chunk, keys); @@ -71982,7 +72219,7 @@ class JoinHashTable { mutex pinned_handles_lock; vector pinned_handles; //! The hash map of the HT, created after finalization - BufferHandle hash_map; + AllocatedData hash_map; //! Whether or not NULL values are considered equal in each of the comparisons vector null_values_are_equal; @@ -72066,9 +72303,10 @@ class JoinHashTable { idx_t SwizzledSize() const { return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes(); } - //! Capacity of the pointer table given the + //! Capacity of the pointer table given the ht count + //! (minimum of 1024 to prevent collision chance for small HT's) static idx_t PointerTableCapacity(idx_t count) { - return NextPowerOfTwo(MaxValue(count * 2, (Storage::BLOCK_SIZE / sizeof(data_ptr_t)) + 1)); + return MaxValue(NextPowerOfTwo(count * 2), 1 << 10); } //! Swizzle the blocks in this HT (moves from block_collection and string_heap to swizzled_...) @@ -72239,7 +72477,7 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx auto hash_data = (hash_t *)hdata.data; auto result_data = FlatVector::GetData(pointers); - auto main_ht = (data_ptr_t *)hash_map.Ptr(); + auto main_ht = (data_ptr_t *)hash_map.get(); for (idx_t i = 0; i < count; i++) { auto rindex = sel.get_index(i); auto hindex = hdata.sel->get_index(rindex); @@ -72421,7 +72659,7 @@ void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_loc hashes.Flatten(count); D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR); - auto pointers = (atomic *)hash_map.Ptr(); + auto pointers = (atomic *)hash_map.get(); auto indices = FlatVector::GetData(hashes); if (parallel) { @@ -72438,19 +72676,19 @@ void JoinHashTable::InitializePointerTable() { D_ASSERT((capacity & (capacity - 1)) == 0); bitmask = capacity - 1; - if (!hash_map.IsValid()) { + if (!hash_map.get()) { // allocate the HT if not yet done - hash_map = buffer_manager.Allocate(capacity * sizeof(data_ptr_t)); + hash_map = buffer_manager.GetBufferAllocator().Allocate(capacity * sizeof(data_ptr_t)); } - D_ASSERT(hash_map.GetFileBuffer().size >= capacity * sizeof(data_ptr_t)); + D_ASSERT(hash_map.GetSize() == capacity * sizeof(data_ptr_t)); // initialize HT with all-zero entries - memset(hash_map.Ptr(), 0, capacity * sizeof(data_ptr_t)); + memset(hash_map.get(), 0, capacity * sizeof(data_ptr_t)); } void JoinHashTable::Finalize(idx_t block_idx_start, idx_t block_idx_end, bool parallel) { // Pointer table should be allocated - D_ASSERT(hash_map.IsValid()); + D_ASSERT(hash_map.get()); vector local_pinned_handles; @@ -73332,7 +73570,8 @@ ProbeSpillLocalState ProbeSpill::RegisterThread() { result.local_partition = local_partitions.back().get(); result.local_partition_append_state = local_partition_append_states.back().get(); } else { - local_spill_collections.emplace_back(make_unique(context, probe_types)); + local_spill_collections.emplace_back( + make_unique(BufferManager::GetBufferManager(context), probe_types)); local_spill_append_states.emplace_back(make_unique()); local_spill_collections.back()->InitializeAppend(*local_spill_append_states.back()); @@ -73363,7 +73602,8 @@ void ProbeSpill::Finalize() { local_partition_append_states.clear(); } else { if (local_spill_collections.empty()) { - global_spill_collection = make_unique(context, probe_types); + global_spill_collection = + make_unique(BufferManager::GetBufferManager(context), probe_types); } else { global_spill_collection = move(local_spill_collections[0]); for (idx_t i = 1; i < local_spill_collections.size(); i++) { @@ -73380,7 +73620,8 @@ void ProbeSpill::PrepareNextProbe() { auto &partitions = global_partitions->GetPartitions(); if (partitions.empty() || ht.partition_start == partitions.size()) { // Can't probe, just make an empty one - global_spill_collection = make_unique(context, probe_types); + global_spill_collection = + make_unique(BufferManager::GetBufferManager(context), probe_types); } else { // Move specific partitions to the global spill collection global_spill_collection = move(partitions[ht.partition_start]); @@ -73654,6 +73895,44 @@ static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t r } } +static void MarkJoinNested(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[], + ExpressionType comparison_type) { + Vector left_reference(left.GetType()); + SelectionVector true_sel(rcount); + for (idx_t i = 0; i < lcount; i++) { + if (found_match[i]) { + continue; + } + ConstantVector::Reference(left_reference, left, i, rcount); + idx_t count; + switch (comparison_type) { + case ExpressionType::COMPARE_EQUAL: + count = VectorOperations::Equals(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + case ExpressionType::COMPARE_NOTEQUAL: + count = VectorOperations::NotEquals(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + case ExpressionType::COMPARE_LESSTHAN: + count = VectorOperations::LessThan(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + case ExpressionType::COMPARE_GREATERTHAN: + count = VectorOperations::GreaterThan(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + count = VectorOperations::LessThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + count = VectorOperations::GreaterThanEquals(left_reference, right, nullptr, rcount, nullptr, nullptr); + break; + default: + throw InternalException("Unsupported comparison type for MarkJoinNested"); + } + if (count > 0) { + found_match[i] = true; + } + } +} + template static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) { switch (left.GetType().InternalType()) { @@ -73689,6 +73968,13 @@ static void MarkJoinSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcou static void MarkJoinComparisonSwitch(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[], ExpressionType comparison_type) { + switch (left.GetType().InternalType()) { + case PhysicalType::STRUCT: + case PhysicalType::LIST: + return MarkJoinNested(left, right, lcount, rcount, found_match, comparison_type); + default: + break; + } D_ASSERT(left.GetType() == right.GetType()); switch (comparison_type) { case ExpressionType::COMPARE_EQUAL: @@ -79794,6 +80080,7 @@ class LimitPercentOperatorState : public GlobalSourceState { public: explicit LimitPercentOperatorState(const PhysicalLimitPercent &op) : limit(DConstants::INVALID_INDEX), current_offset(0) { + D_ASSERT(op.sink_state); auto &gstate = (LimitPercentGlobalState &)*op.sink_state; gstate.data.InitializeScan(scan_state); } @@ -80972,7 +81259,12 @@ void PhysicalTransaction::GetData(ExecutionContext &context, DataChunk &chunk, G LocalSourceState &lstate) const { auto &client = context.client; - switch (info->type) { + auto type = info->type; + if (type == TransactionType::COMMIT && ValidChecker::IsInvalidated(client.ActiveTransaction())) { + // transaction is invalidated - turn COMMIT into ROLLBACK + type = TransactionType::ROLLBACK; + } + switch (type) { case TransactionType::BEGIN_TRANSACTION: { if (client.transaction.IsAutoCommit()) { // start the active transaction @@ -81119,6 +81411,7 @@ class LogicalGet : public LogicalOperator { void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override; @@ -81257,6 +81550,7 @@ class PhysicalJoin : public CachingPhysicalOperator { public: bool EmptyResultIfRHSIsEmpty() const; + static bool HasNullValues(DataChunk &chunk); static void ConstructSemiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]); static void ConstructAntiJoinResult(DataChunk &left, DataChunk &result, bool found_match[]); static void ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left, DataChunk &result, bool found_match[], @@ -83260,6 +83554,10 @@ class HashJoinFinalizeEvent : public BasePipelineEvent { }; void HashJoinGlobalSinkState::ScheduleFinalize(Pipeline &pipeline, Event &event) { + if (hash_table->Count() == 0) { + hash_table->finalized = true; + return; + } hash_table->InitializePointerTable(); auto new_event = make_shared(pipeline, *this); event.InsertEvent(move(new_event)); @@ -85258,7 +85556,7 @@ namespace duckdb { class IndexJoinOperatorState : public CachingOperatorState { public: IndexJoinOperatorState(ClientContext &context, const PhysicalIndexJoin &op) - : probe_executor(context), arena_allocator(Allocator::Get(context)), keys(STANDARD_VECTOR_SIZE) { + : probe_executor(context), arena_allocator(BufferAllocator::Get(context)), keys(STANDARD_VECTOR_SIZE) { auto &allocator = Allocator::Get(context); rhs_rows.resize(STANDARD_VECTOR_SIZE); result_sizes.resize(STANDARD_VECTOR_SIZE); @@ -85626,7 +85924,7 @@ class PhysicalNestedLoopJoin : public PhysicalComparisonJoin { return true; } - static bool IsSupported(const vector &conditions); + static bool IsSupported(const vector &conditions, JoinType join_type); public: //! Returns a list of the types of the join conditions @@ -85660,7 +85958,7 @@ PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr

&conditions) { +bool PhysicalNestedLoopJoin::IsSupported(const vector &conditions, JoinType join_type) { + if (join_type == JoinType::MARK) { + return true; + } for (auto &cond : conditions) { if (cond.left->return_type.InternalType() == PhysicalType::STRUCT || cond.left->return_type.InternalType() == PhysicalType::LIST) { @@ -85793,7 +86094,7 @@ class NestedLoopJoinGlobalState : public GlobalSinkState { //! Materialized join condition of the RHS ColumnDataCollection right_condition_data; //! Whether or not the RHS of the nested loop join has NULL values - bool has_null; + atomic has_null; //! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN) OuterJoinMarker right_outer; }; @@ -91849,6 +92150,7 @@ class PhysicalBatchInsert : public PhysicalOperator { + namespace duckdb { PhysicalBatchInsert::PhysicalBatchInsert(vector types, TableCatalogEntry *table, @@ -91891,22 +92193,9 @@ class CollectionMerger { if (Empty()) { return nullptr; } - unique_ptr new_collection; - if (current_collections.size() == 1) { - // we have gathered only one row group collection: merge it directly - new_collection = move(current_collections[0]); - } else { + unique_ptr new_collection = move(current_collections[0]); + if (current_collections.size() > 1) { // we have gathered multiple collections: create one big collection and merge that - // find the biggest collection - idx_t biggest_index = 0; - for (idx_t i = 1; i < current_collections.size(); i++) { - D_ASSERT(current_collections[i]); - if (current_collections[i]->GetTotalRows() > current_collections[biggest_index]->GetTotalRows()) { - biggest_index = i; - } - } - // now append all the other collections to this collection - new_collection = move(current_collections[biggest_index]); auto &types = new_collection->GetTypes(); TableAppendState append_state; new_collection->InitializeAppend(append_state); @@ -95377,15 +95666,14 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) for (auto &pipeline : pipelines) { auto sink = pipeline->GetSink(); if (sink != this) { - // reset the sink state for any intermediate sinks - sink->sink_state = sink->GetGlobalSinkState(context.client); + sink->sink_state.reset(); } for (auto &op : pipeline->GetOperators()) { if (op) { - op->op_state = op->GetGlobalOperatorState(context.client); + op->op_state.reset(); } } - pipeline->ResetSource(true); + pipeline->ClearSource(); } // get the MetaPipelines in the recursive_meta_pipeline and reschedule them @@ -96401,6 +96689,7 @@ class LogicalAggregate : public LogicalOperator { void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); idx_t EstimateCardinality(ClientContext &context) override; + vector GetTableIndex() const override; protected: void ResolveTypes() override; @@ -96683,6 +96972,7 @@ class LogicalColumnDataGet : public LogicalOperator { void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -96952,6 +97242,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalComparison op.estimated_cardinality, perfect_join_stats); } else { + static constexpr const idx_t NESTED_LOOP_JOIN_THRESHOLD = 5; bool can_merge = has_range > 0; bool can_iejoin = has_range >= 2 && recursive_cte_tables.empty(); switch (op.join_type) { @@ -96964,6 +97255,11 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalComparison default: break; } + if (left->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD || + right->estimated_cardinality <= NESTED_LOOP_JOIN_THRESHOLD) { + can_iejoin = false; + can_merge = false; + } if (can_iejoin) { plan = make_unique(op, move(left), move(right), move(op.conditions), op.join_type, op.estimated_cardinality); @@ -96971,7 +97267,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalComparison // range join: use piecewise merge join plan = make_unique(op, move(left), move(right), move(op.conditions), op.join_type, op.estimated_cardinality); - } else if (PhysicalNestedLoopJoin::IsSupported(op.conditions)) { + } else if (PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) { // inequality join: use nested loop plan = make_unique(op, move(left), move(right), move(op.conditions), op.join_type, op.estimated_cardinality); @@ -97195,7 +97491,6 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateInde - //===----------------------------------------------------------------------===// // DuckDB // @@ -97237,25 +97532,11 @@ class LogicalCreateTable : public LogicalOperator { -namespace duckdb { -static void ExtractDependencies(Expression &expr, unordered_set &dependencies) { - if (expr.type == ExpressionType::BOUND_FUNCTION) { - auto &function = (BoundFunctionExpression &)expr; - if (function.function.dependency) { - function.function.dependency(function, dependencies); - } - } - ExpressionIterator::EnumerateChildren(expr, [&](Expression &child) { ExtractDependencies(child, dependencies); }); -} + +namespace duckdb { unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) { - // extract dependencies from any default values - for (auto &default_value : op.info->bound_defaults) { - if (default_value) { - ExtractDependencies(*default_value, op.info->dependencies); - } - } auto &create_info = (CreateTableInfo &)*op.info->base; auto &catalog = Catalog::GetCatalog(context); auto existing_entry = @@ -97266,13 +97547,14 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalCreateTabl bool parallel_streaming_insert = !PreserveInsertionOrder(*plan); bool use_batch_index = UseBatchIndex(*plan); + auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads(); unique_ptr create; if (!parallel_streaming_insert && use_batch_index) { create = make_unique(op, op.schema, move(op.info), op.estimated_cardinality); } else { create = make_unique(op, op.schema, move(op.info), op.estimated_cardinality, - parallel_streaming_insert); + parallel_streaming_insert && num_threads > 1); } D_ASSERT(op.children.size() == 1); @@ -97354,8 +97636,9 @@ namespace duckdb { class LogicalDelete : public LogicalOperator { public: - explicit LogicalDelete(TableCatalogEntry *table) - : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(0), return_chunk(false) { + explicit LogicalDelete(TableCatalogEntry *table, idx_t table_index) + : LogicalOperator(LogicalOperatorType::LOGICAL_DELETE), table(table), table_index(table_index), + return_chunk(false) { } TableCatalogEntry *table; @@ -97366,6 +97649,7 @@ class LogicalDelete : public LogicalOperator { void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); idx_t EstimateCardinality(ClientContext &context) override; + vector GetTableIndex() const override; protected: vector GetColumnBindings() override { @@ -97442,6 +97726,7 @@ class LogicalDelimGet : public LogicalOperator { } void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -97682,6 +97967,7 @@ class LogicalDummyScan : public LogicalOperator { } void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -98065,6 +98351,7 @@ class LogicalExpressionGet : public LogicalOperator { idx_t EstimateCardinality(ClientContext &context) override { return expressions.size(); } + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -98736,8 +99023,9 @@ namespace duckdb { //! LogicalInsert represents an insertion of data into a base table class LogicalInsert : public LogicalOperator { public: - explicit LogicalInsert(TableCatalogEntry *table) - : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(0), return_chunk(false) { + LogicalInsert(TableCatalogEntry *table, idx_t table_index) + : LogicalOperator(LogicalOperatorType::LOGICAL_INSERT), table(table), table_index(table_index), + return_chunk(false) { } vector>> insert_values; @@ -98774,6 +99062,7 @@ class LogicalInsert : public LogicalOperator { } idx_t EstimateCardinality(ClientContext &context) override; + vector GetTableIndex() const override; }; } // namespace duckdb @@ -98828,6 +99117,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalInsert &op bool parallel_streaming_insert = !PreserveInsertionOrder(*plan); bool use_batch_index = UseBatchIndex(*plan); + auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads(); if (op.return_chunk) { // not supported for RETURNING (yet?) parallel_streaming_insert = false; @@ -98839,7 +99129,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalInsert &op op.estimated_cardinality); } else { insert = make_unique(op.types, op.table, op.column_index_map, move(op.bound_defaults), - op.estimated_cardinality, op.return_chunk, parallel_streaming_insert); + op.estimated_cardinality, op.return_chunk, + parallel_streaming_insert && num_threads > 1); } if (plan) { insert->children.push_back(move(plan)); @@ -99182,6 +99473,7 @@ class LogicalProjection : public LogicalOperator { vector GetColumnBindings() override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override; @@ -99271,6 +99563,7 @@ class LogicalRecursiveCTE : public LogicalOperator { } void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -99318,6 +99611,7 @@ class LogicalCTERef : public LogicalOperator { } void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -99530,6 +99824,7 @@ class LogicalSetOperation : public LogicalOperator { void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override { @@ -99870,6 +100165,7 @@ class LogicalUnnest : public LogicalOperator { vector GetColumnBindings() override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override; @@ -99997,6 +100293,7 @@ class LogicalWindow : public LogicalOperator { vector GetColumnBindings() override; void Serialize(FieldWriter &writer) const override; static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + vector GetTableIndex() const override; protected: void ResolveTypes() override; @@ -100155,6 +100452,8 @@ struct LogicalExtensionOperator : public LogicalOperator { : LogicalOperator(LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR, move(expressions)) { } + static unique_ptr Deserialize(LogicalDeserializationState &state, FieldReader &reader); + virtual unique_ptr CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) = 0; }; } // namespace duckdb @@ -106606,7 +106905,8 @@ struct Interpolator { template <> struct Interpolator { Interpolator(const double q, const idx_t n_p) - : n(n_p), RN((double)(n_p - 1) * q), FRN(floor(RN)), CRN(FRN), begin(0), end(n_p) { + : n(n_p), RN((double)(n_p * q)), FRN(MaxValue(1, n_p - floor(n_p - RN)) - 1), CRN(FRN), begin(0), + end(n_p) { } template > @@ -108444,13 +108744,16 @@ struct LinkedList { // forward declarations struct WriteDataToSegment; struct ReadDataFromSegment; +struct CopyDataFromSegment; typedef ListSegment *(*create_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator, - vector &owning_vector, uint16_t &capacity); + vector &owning_vector, const uint16_t &capacity); typedef void (*write_data_to_segment_t)(WriteDataToSegment &write_data_to_segment, Allocator &allocator, vector &owning_vector, ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count); -typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, ListSegment *segment, +typedef void (*read_data_from_segment_t)(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment, Vector &result, idx_t &total_count); +typedef ListSegment *(*copy_data_from_segment_t)(CopyDataFromSegment ©_data_from_segment, const ListSegment *source, + Allocator &allocator, vector &owning_vector); struct WriteDataToSegment { create_segment_t create_segment; @@ -108461,6 +108764,10 @@ struct ReadDataFromSegment { read_data_from_segment_t segment_function; vector child_functions; }; +struct CopyDataFromSegment { + copy_data_from_segment_t segment_function; + vector child_functions; +}; // forward declarations static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allocator, @@ -108468,24 +108775,27 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo idx_t &count); static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedList *linked_list, Vector &result, idx_t &initial_total_count); +static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list, + LinkedList &target_list, Allocator &allocator, vector &owning_vector); template static data_ptr_t AllocatePrimitiveData(Allocator &allocator, vector &owning_vector, - uint16_t &capacity) { + const uint16_t &capacity) { owning_vector.emplace_back(allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(T)))); return owning_vector.back().get(); } -static data_ptr_t AllocateListData(Allocator &allocator, vector &owning_vector, uint16_t &capacity) { +static data_ptr_t AllocateListData(Allocator &allocator, vector &owning_vector, + const uint16_t &capacity) { owning_vector.emplace_back( allocator.Allocate(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList))); return owning_vector.back().get(); } -static data_ptr_t AllocateStructData(Allocator &allocator, vector &owning_vector, uint16_t &capacity, - idx_t child_count) { +static data_ptr_t AllocateStructData(Allocator &allocator, vector &owning_vector, + const uint16_t &capacity, const idx_t &child_count) { owning_vector.emplace_back( allocator.Allocate(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *))); @@ -108493,28 +108803,28 @@ static data_ptr_t AllocateStructData(Allocator &allocator, vector } template -static T *GetPrimitiveData(ListSegment *segment) { +static T *GetPrimitiveData(const ListSegment *segment) { return (T *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool)); } -static uint64_t *GetListLengthData(ListSegment *segment) { +static uint64_t *GetListLengthData(const ListSegment *segment) { return (uint64_t *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool)); } -static LinkedList *GetListChildData(ListSegment *segment) { +static LinkedList *GetListChildData(const ListSegment *segment) { return (LinkedList *)(((char *)segment) + sizeof(ListSegment) + segment->capacity * (sizeof(bool) + sizeof(uint64_t))); } -static ListSegment **GetStructData(ListSegment *segment) { +static ListSegment **GetStructData(const ListSegment *segment) { return (ListSegment **)(((char *)segment) + sizeof(ListSegment) + segment->capacity * sizeof(bool)); } -static bool *GetNullMask(ListSegment *segment) { +static bool *GetNullMask(const ListSegment *segment) { return (bool *)(((char *)segment) + sizeof(ListSegment)); } -static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) { +static uint16_t GetCapacityForNewSegment(const LinkedList *linked_list) { // consecutive segments grow by the power of two uint16_t capacity = 4; @@ -108527,7 +108837,7 @@ static uint16_t GetCapacityForNewSegment(LinkedList *linked_list) { template static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allocator, - vector &owning_vector, uint16_t &capacity) { + vector &owning_vector, const uint16_t &capacity) { // allocate data and set the header auto segment = (ListSegment *)AllocatePrimitiveData(allocator, owning_vector, capacity); @@ -108538,7 +108848,7 @@ static ListSegment *CreatePrimitiveSegment(WriteDataToSegment &, Allocator &allo } static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator, vector &owning_vector, - uint16_t &capacity) { + const uint16_t &capacity) { // allocate data and set the header auto segment = (ListSegment *)AllocateListData(allocator, owning_vector, capacity); @@ -108555,7 +108865,7 @@ static ListSegment *CreateListSegment(WriteDataToSegment &, Allocator &allocator } static ListSegment *CreateStructSegment(WriteDataToSegment &write_data_to_segment, Allocator &allocator, - vector &owning_vector, uint16_t &capacity) { + vector &owning_vector, const uint16_t &capacity) { // allocate data and set header auto segment = (ListSegment *)AllocateStructData(allocator, owning_vector, capacity, @@ -108749,7 +109059,7 @@ static void AppendRow(WriteDataToSegment &write_data_to_segment, Allocator &allo } template -static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result, +static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result, idx_t &total_count) { auto &aggr_vector_validity = FlatVector::Validity(result); @@ -108773,7 +109083,7 @@ static void ReadDataFromPrimitiveSegment(ReadDataFromSegment &, ListSegment *seg } } -static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segment, Vector &result, +static void ReadDataFromVarcharSegment(ReadDataFromSegment &, const ListSegment *segment, Vector &result, idx_t &total_count) { auto &aggr_vector_validity = FlatVector::Validity(result); @@ -108814,8 +109124,8 @@ static void ReadDataFromVarcharSegment(ReadDataFromSegment &, ListSegment *segme } } -static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment, Vector &result, - idx_t &total_count) { +static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment, + Vector &result, idx_t &total_count) { auto &aggr_vector_validity = FlatVector::Validity(result); @@ -108854,8 +109164,8 @@ static void ReadDataFromListSegment(ReadDataFromSegment &read_data_from_segment, BuildListVector(read_data_from_segment.child_functions[0], &linked_child_list, child_vector, starting_offset); } -static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, ListSegment *segment, Vector &result, - idx_t &total_count) { +static void ReadDataFromStructSegment(ReadDataFromSegment &read_data_from_segment, const ListSegment *segment, + Vector &result, idx_t &total_count) { auto &aggr_vector_validity = FlatVector::Validity(result); @@ -108894,6 +109204,86 @@ static void BuildListVector(ReadDataFromSegment &read_data_from_segment, LinkedL linked_list->last_segment = nullptr; } +template +static ListSegment *CopyDataFromPrimitiveSegment(CopyDataFromSegment &, const ListSegment *source, Allocator &allocator, + vector &owning_vector) { + + auto target = (ListSegment *)AllocatePrimitiveData(allocator, owning_vector, source->capacity); + memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T))); + target->next = nullptr; + return target; +} + +static ListSegment *CopyDataFromListSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source, + Allocator &allocator, vector &owning_vector) { + + // create an empty linked list for the child vector of target + auto source_linked_child_list = Load((data_ptr_t)GetListChildData(source)); + + // create the segment + auto target = (ListSegment *)AllocateListData(allocator, owning_vector, source->capacity); + memcpy(target, source, + sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList)); + target->next = nullptr; + + auto target_linked_list = GetListChildData(target); + LinkedList linked_list(source_linked_child_list.total_capacity, nullptr, nullptr); + Store(linked_list, (data_ptr_t)target_linked_list); + + // recurse to copy the linked child list + auto target_linked_child_list = Load((data_ptr_t)GetListChildData(target)); + D_ASSERT(copy_data_from_segment.child_functions.size() == 1); + CopyLinkedList(copy_data_from_segment.child_functions[0], &source_linked_child_list, target_linked_child_list, + allocator, owning_vector); + + // store the updated linked list + Store(target_linked_child_list, (data_ptr_t)GetListChildData(target)); + return target; +} + +static ListSegment *CopyDataFromStructSegment(CopyDataFromSegment ©_data_from_segment, const ListSegment *source, + Allocator &allocator, vector &owning_vector) { + + auto source_child_count = copy_data_from_segment.child_functions.size(); + auto target = (ListSegment *)AllocateStructData(allocator, owning_vector, source->capacity, source_child_count); + memcpy(target, source, + sizeof(ListSegment) + source->capacity * sizeof(bool) + source_child_count * sizeof(ListSegment *)); + target->next = nullptr; + + // recurse and copy the children + auto source_child_segments = GetStructData(source); + auto target_child_segments = GetStructData(target); + + for (idx_t i = 0; i < copy_data_from_segment.child_functions.size(); i++) { + auto child_function = copy_data_from_segment.child_functions[i]; + auto source_child_segment = Load((data_ptr_t)(source_child_segments + i)); + auto target_child_segment = + child_function.segment_function(child_function, source_child_segment, allocator, owning_vector); + Store(target_child_segment, (data_ptr_t)(target_child_segments + i)); + } + return target; +} + +static void CopyLinkedList(CopyDataFromSegment ©_data_from_segment, const LinkedList *source_list, + LinkedList &target_list, Allocator &allocator, vector &owning_vector) { + + auto source_segment = source_list->first_segment; + + while (source_segment) { + auto target_segment = + copy_data_from_segment.segment_function(copy_data_from_segment, source_segment, allocator, owning_vector); + source_segment = source_segment->next; + + if (!target_list.first_segment) { + target_list.first_segment = target_segment; + } + if (target_list.last_segment) { + target_list.last_segment->next = target_segment; + } + target_list.last_segment = target_segment; + } +} + static void InitializeValidities(Vector &vector, idx_t &capacity) { auto &validity_mask = FlatVector::Validity(vector); @@ -108937,6 +109327,7 @@ struct ListBindData : public FunctionData { LogicalType stype; WriteDataToSegment write_data_to_segment; ReadDataFromSegment read_data_from_segment; + CopyDataFromSegment copy_data_from_segment; unique_ptr Copy() const override { return make_unique(stype); @@ -108949,7 +109340,8 @@ struct ListBindData : public FunctionData { }; static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment, - ReadDataFromSegment &read_data_from_segment, const LogicalType &type) { + ReadDataFromSegment &read_data_from_segment, + CopyDataFromSegment ©_data_from_segment, const LogicalType &type) { auto physical_type = type.InternalType(); switch (physical_type) { @@ -108958,113 +109350,135 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment, write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INT8: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INT16: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INT32: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INT64: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::UINT8: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::UINT16: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::UINT32: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::UINT64: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::FLOAT: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::DOUBLE: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INT128: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::INTERVAL: { write_data_to_segment.create_segment = CreatePrimitiveSegment; write_data_to_segment.segment_function = WriteDataToPrimitiveSegment; read_data_from_segment.segment_function = ReadDataFromPrimitiveSegment; + copy_data_from_segment.segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::VARCHAR: { write_data_to_segment.create_segment = CreateListSegment; write_data_to_segment.segment_function = WriteDataToVarcharSegment; read_data_from_segment.segment_function = ReadDataFromVarcharSegment; + copy_data_from_segment.segment_function = CopyDataFromListSegment; write_data_to_segment.child_functions.emplace_back(WriteDataToSegment()); write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment; + copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment()); + copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment; break; } case PhysicalType::LIST: { write_data_to_segment.create_segment = CreateListSegment; write_data_to_segment.segment_function = WriteDataToListSegment; read_data_from_segment.segment_function = ReadDataFromListSegment; + copy_data_from_segment.segment_function = CopyDataFromListSegment; // recurse write_data_to_segment.child_functions.emplace_back(WriteDataToSegment()); read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment()); + copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment()); GetSegmentDataFunctions(write_data_to_segment.child_functions.back(), - read_data_from_segment.child_functions.back(), ListType::GetChildType(type)); + read_data_from_segment.child_functions.back(), + copy_data_from_segment.child_functions.back(), ListType::GetChildType(type)); break; } case PhysicalType::STRUCT: { write_data_to_segment.create_segment = CreateStructSegment; write_data_to_segment.segment_function = WriteDataToStructSegment; read_data_from_segment.segment_function = ReadDataFromStructSegment; + copy_data_from_segment.segment_function = CopyDataFromStructSegment; // recurse auto child_types = StructType::GetChildTypes(type); for (idx_t i = 0; i < child_types.size(); i++) { write_data_to_segment.child_functions.emplace_back(WriteDataToSegment()); read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment()); + copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment()); GetSegmentDataFunctions(write_data_to_segment.child_functions.back(), - read_data_from_segment.child_functions.back(), child_types[i].second); + read_data_from_segment.child_functions.back(), + copy_data_from_segment.child_functions.back(), child_types[i].second); } break; } @@ -109077,7 +109491,7 @@ ListBindData::ListBindData(const LogicalType &stype_p) : stype(stype_p) { // always unnest once because the result vector is of type LIST auto type = ListType::GetChildType(stype_p); - GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, type); + GetSegmentDataFunctions(write_data_to_segment, read_data_from_segment, copy_data_from_segment, type); } ListBindData::~ListBindData() { @@ -109145,11 +109559,13 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_d } } -static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &, idx_t count) { +static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) { UnifiedVectorFormat sdata; state.ToUnifiedFormat(count, sdata); auto states_ptr = (ListAggState **)sdata.data; + auto &list_bind_data = (ListBindData &)*aggr_input_data.bind_data; + auto combined_ptr = FlatVector::GetData(combined); for (idx_t i = 0; i < count; i++) { auto state = states_ptr[sdata.sel->get_index(i)]; @@ -109159,32 +109575,27 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD } D_ASSERT(state->type); D_ASSERT(state->owning_vector); - if (!combined_ptr[i]->linked_list) { - // copy the linked list + if (!combined_ptr[i]->linked_list) { combined_ptr[i]->linked_list = new LinkedList(0, nullptr, nullptr); - combined_ptr[i]->linked_list->first_segment = state->linked_list->first_segment; - combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment; - combined_ptr[i]->linked_list->total_capacity = state->linked_list->total_capacity; - - // copy the type + combined_ptr[i]->owning_vector = new vector; combined_ptr[i]->type = new LogicalType(*state->type); + } + auto owning_vector = combined_ptr[i]->owning_vector; - // new owning_vector to hold the unique pointers - combined_ptr[i]->owning_vector = new vector; + // copy the linked list of the state + auto copied_linked_list = LinkedList(state->linked_list->total_capacity, nullptr, nullptr); + CopyLinkedList(list_bind_data.copy_data_from_segment, state->linked_list, copied_linked_list, + aggr_input_data.allocator, *owning_vector); + // append the copied linked list to the combined state + if (combined_ptr[i]->linked_list->last_segment) { + combined_ptr[i]->linked_list->last_segment->next = copied_linked_list.first_segment; } else { - combined_ptr[i]->linked_list->last_segment->next = state->linked_list->first_segment; - combined_ptr[i]->linked_list->last_segment = state->linked_list->last_segment; - combined_ptr[i]->linked_list->total_capacity += state->linked_list->total_capacity; - } - - // copy the owning vector (and its unique pointers to the allocated data) - // FIXME: more efficient way of copying the unique pointers? - auto &owning_vector = *state->owning_vector; - for (idx_t j = 0; j < state->owning_vector->size(); j++) { - combined_ptr[i]->owning_vector->push_back(move(owning_vector[j])); + combined_ptr[i]->linked_list->first_segment = copied_linked_list.first_segment; } + combined_ptr[i]->linked_list->last_segment = copied_linked_list.last_segment; + combined_ptr[i]->linked_list->total_capacity += copied_linked_list.total_capacity; } } @@ -109448,10 +109859,11 @@ struct RegrCountFunction { namespace duckdb { void RegrCountFun::RegisterFunction(BuiltinFunctions &set) { - AggregateFunctionSet corr("regr_count"); - corr.AddFunction(AggregateFunction::BinaryAggregate( - LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER)); - set.AddFunction(corr); + auto regr_count = AggregateFunction::BinaryAggregate( + LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::UINTEGER); + regr_count.name = "regr_count"; + regr_count.null_handling = FunctionNullHandling::SPECIAL_HANDLING; + set.AddFunction(regr_count); } } // namespace duckdb @@ -116380,16 +116792,16 @@ struct StructDatePart { const auto idx = rdata.sel->get_index(i); if (arg_valid.RowIsValid(idx)) { if (Value::IsFinite(tdata[idx])) { - DatePart::StructOperator::Operation(part_values.data(), tdata[idx], idx, part_mask); + DatePart::StructOperator::Operation(part_values.data(), tdata[idx], i, part_mask); } else { for (auto &child_entry : child_entries) { - FlatVector::Validity(*child_entry).SetInvalid(idx); + FlatVector::Validity(*child_entry).SetInvalid(i); } } } else { - res_valid.SetInvalid(idx); + res_valid.SetInvalid(i); for (auto &child_entry : child_entries) { - FlatVector::Validity(*child_entry).SetInvalid(idx); + FlatVector::Validity(*child_entry).SetInvalid(i); } } } @@ -118557,15 +118969,16 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF } struct StrfTimeBindData : public FunctionData { - explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p) - : format(move(format_p)), format_string(move(format_string_p)) { + explicit StrfTimeBindData(StrfTimeFormat format_p, string format_string_p, bool is_null) + : format(move(format_p)), format_string(move(format_string_p)), is_null(is_null) { } StrfTimeFormat format; string format_string; + bool is_null; unique_ptr Copy() const override { - return make_unique(format, format_string); + return make_unique(format, format_string, is_null); } bool Equals(const FunctionData &other_p) const override { @@ -118588,13 +119001,14 @@ static unique_ptr StrfTimeBindFunction(ClientContext &context, Sca Value options_str = ExpressionExecutor::EvaluateScalar(context, *format_arg); auto format_string = options_str.GetValue(); StrfTimeFormat format; - if (!options_str.IsNull()) { + bool is_null = options_str.IsNull(); + if (!is_null) { string error = StrTimeFormat::ParseFormatSpecifier(format_string, format); if (!error.empty()) { throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error); } } - return make_unique(format, format_string); + return make_unique(format, format_string, is_null); } void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) { @@ -118621,7 +119035,7 @@ static void StrfTimeFunctionDate(DataChunk &args, ExpressionState &state, Vector auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (StrfTimeBindData &)*func_expr.bind_info; - if (ConstantVector::IsNull(args.data[REVERSED ? 0 : 1])) { + if (info.is_null) { result.SetVectorType(VectorType::CONSTANT_VECTOR); ConstantVector::SetNull(result, true); return; @@ -118655,7 +119069,7 @@ static void StrfTimeFunctionTimestamp(DataChunk &args, ExpressionState &state, V auto &func_expr = (BoundFunctionExpression &)state.expr; auto &info = (StrfTimeBindData &)*func_expr.bind_info; - if (ConstantVector::IsNull(args.data[REVERSED ? 0 : 1])) { + if (info.is_null) { result.SetVectorType(VectorType::CONSTANT_VECTOR); ConstantVector::SetNull(result, true); return; @@ -132153,7 +132567,9 @@ static unique_ptr StructInsertBind(ClientContext &context, ScalarF unique_ptr StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) { auto &child_stats = input.child_stats; auto &expr = input.expr; - + if (child_stats.empty() || !child_stats[0]) { + return nullptr; + } auto &existing_struct_stats = (StructStatistics &)*child_stats[0]; auto new_struct_stats = make_unique(expr.return_type); @@ -137624,6 +138040,9 @@ static unique_ptr DuckDBFunctionsBind(ClientContext &context, Tabl names.emplace_back("has_side_effects"); return_types.emplace_back(LogicalType::BOOLEAN); + names.emplace_back("function_oid"); + return_types.emplace_back(LogicalType::BIGINT); + return nullptr; } @@ -138010,6 +138429,9 @@ bool ExtractFunctionData(StandardEntry *entry, idx_t function_idx, DataChunk &ou // has_side_effects, LogicalType::BOOLEAN output.SetValue(9, output_offset, OP::HasSideEffects(function, function_idx)); + // function_oid, LogicalType::BIGINT + output.SetValue(10, output_offset, Value::BIGINT(entry->oid)); + return function_idx + 1 == OP::FunctionCount(function); } @@ -141040,14 +141462,16 @@ void UDFWrapper::RegisterAggrFunction(AggregateFunction aggr_function, ClientCon + namespace duckdb { -BaseAppender::BaseAppender(Allocator &allocator) : allocator(allocator), column(0) { +BaseAppender::BaseAppender(Allocator &allocator, AppenderType type_p) + : allocator(allocator), column(0), appender_type(type_p) { } -BaseAppender::BaseAppender(Allocator &allocator_p, vector types_p) +BaseAppender::BaseAppender(Allocator &allocator_p, vector types_p, AppenderType type_p) : allocator(allocator_p), types(move(types_p)), collection(make_unique(allocator, types)), - column(0) { + column(0), appender_type(type_p) { InitializeChunk(); } @@ -141067,7 +141491,8 @@ void BaseAppender::Destructor() { } InternalAppender::InternalAppender(ClientContext &context_p, TableCatalogEntry &table_p) - : BaseAppender(Allocator::DefaultAllocator(), table_p.GetTypes()), context(context_p), table(table_p) { + : BaseAppender(Allocator::DefaultAllocator(), table_p.GetTypes(), AppenderType::PHYSICAL), context(context_p), + table(table_p) { } InternalAppender::~InternalAppender() { @@ -141075,7 +141500,7 @@ InternalAppender::~InternalAppender() { } Appender::Appender(Connection &con, const string &schema_name, const string &table_name) - : BaseAppender(Allocator::DefaultAllocator()), context(con.context) { + : BaseAppender(Allocator::DefaultAllocator(), AppenderType::LOGICAL), context(con.context) { description = con.TableInfo(schema_name, table_name); if (!description) { // table could not be found @@ -141119,6 +141544,27 @@ void BaseAppender::AppendValueInternal(Vector &col, SRC input) { FlatVector::GetData(col)[chunk.size()] = Cast::Operation(input); } +template +void BaseAppender::AppendDecimalValueInternal(Vector &col, SRC input) { + switch (appender_type) { + case AppenderType::LOGICAL: { + auto &type = col.GetType(); + D_ASSERT(type.id() == LogicalTypeId::DECIMAL); + auto width = DecimalType::GetWidth(type); + auto scale = DecimalType::GetScale(type); + TryCastToDecimal::Operation(input, FlatVector::GetData(col)[chunk.size()], nullptr, width, + scale); + return; + } + case AppenderType::PHYSICAL: { + AppendValueInternal(col, input); + return; + } + default: + throw InternalException("Type not implemented for AppenderType"); + } +} + template void BaseAppender::AppendValueInternal(T input) { if (column >= types.size()) { @@ -141164,18 +141610,20 @@ void BaseAppender::AppendValueInternal(T input) { break; case LogicalTypeId::DECIMAL: switch (col.GetType().InternalType()) { - case PhysicalType::INT8: - AppendValueInternal(col, input); - break; case PhysicalType::INT16: - AppendValueInternal(col, input); + AppendDecimalValueInternal(col, input); break; case PhysicalType::INT32: - AppendValueInternal(col, input); + AppendDecimalValueInternal(col, input); break; - default: - AppendValueInternal(col, input); + case PhysicalType::INT64: + AppendDecimalValueInternal(col, input); break; + case PhysicalType::INT128: + AppendDecimalValueInternal(col, input); + break; + default: + throw InternalException("Internal type not recognized for Decimal"); } break; case LogicalTypeId::DATE: @@ -145033,6 +145481,10 @@ class Optimizer { private: void RunOptimizer(OptimizerType type, const std::function &callback); + void Verify(LogicalOperator &op); + +private: + unique_ptr plan; }; } // namespace duckdb @@ -145775,6 +146227,7 @@ unique_ptr ClientContext::ExtractPlan(const string &query) { } ColumnBindingResolver resolver; + resolver.Verify(*plan); resolver.VisitOperator(*plan); plan->ResolveOperatorTypes(); @@ -146866,6 +147319,14 @@ struct MaximumMemorySetting { static Value GetSetting(ClientContext &context); }; +struct PasswordSetting { + static constexpr const char *Name = "password"; + static constexpr const char *Description = "The password to use. Ignored for legacy compatibility."; + static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static Value GetSetting(ClientContext &context); +}; + struct PerfectHashThresholdSetting { static constexpr const char *Name = "perfect_ht_threshold"; static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table (default: 12)"; @@ -146961,6 +147422,14 @@ struct ThreadsSetting { static Value GetSetting(ClientContext &context); }; +struct UsernameSetting { + static constexpr const char *Name = "username"; + static constexpr const char *Description = "The username to use. Ignored for legacy compatibility."; + static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static Value GetSetting(ClientContext &context); +}; + } // namespace duckdb @@ -147010,6 +147479,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting DUCKDB_GLOBAL(MaximumMemorySetting), DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting), DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting), + DUCKDB_GLOBAL(PasswordSetting), DUCKDB_LOCAL(PerfectHashThresholdSetting), DUCKDB_LOCAL(PreserveIdentifierCase), DUCKDB_GLOBAL(PreserveInsertionOrder), @@ -147022,6 +147492,8 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting DUCKDB_LOCAL(SearchPathSetting), DUCKDB_GLOBAL(TempDirectorySetting), DUCKDB_GLOBAL(ThreadsSetting), + DUCKDB_GLOBAL(UsernameSetting), + DUCKDB_GLOBAL_ALIAS("user", UsernameSetting), DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting), DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting), FINAL_SETTING}; @@ -148148,12 +148620,19 @@ class DBInstanceCache { //! Creates and caches a new DB Instance (Fails if a cached instance already exists) shared_ptr CreateInstance(const string &database, DBConfig &config_dict, bool cache_instance = true); + //! Creates and caches a new DB Instance (Fails if a cached instance already exists) + shared_ptr GetOrCreateInstance(const string &database, DBConfig &config_dict, bool cache_instance); + private: //! A map with the cached instances unordered_map> db_instances; //! Lock to alter cache mutex cache_lock; + +private: + shared_ptr GetInstanceInternal(const string &database, const DBConfig &config_dict); + shared_ptr CreateInstanceInternal(const string &database, DBConfig &config_dict, bool cache_instance); }; } // namespace duckdb @@ -148174,8 +148653,7 @@ string GetDBAbsolutePath(const string &database) { return FileSystem::JoinPath(FileSystem::GetWorkingDirectory(), database); } -shared_ptr DBInstanceCache::GetInstance(const string &database, const DBConfig &config) { - lock_guard l(cache_lock); +shared_ptr DBInstanceCache::GetInstanceInternal(const string &database, const DBConfig &config) { shared_ptr db_instance; auto abs_database_path = GetDBAbsolutePath(database); if (db_instances.find(abs_database_path) != db_instances.end()) { @@ -148194,8 +148672,13 @@ shared_ptr DBInstanceCache::GetInstance(const string &database, const DB return db_instance; } -shared_ptr DBInstanceCache::CreateInstance(const string &database, DBConfig &config, bool cache_instance) { +shared_ptr DBInstanceCache::GetInstance(const string &database, const DBConfig &config) { lock_guard l(cache_lock); + return GetInstanceInternal(database, config); +} + +shared_ptr DBInstanceCache::CreateInstanceInternal(const string &database, DBConfig &config, + bool cache_instance) { auto abs_database_path = GetDBAbsolutePath(database); if (db_instances.find(abs_database_path) != db_instances.end()) { throw duckdb::Exception(ExceptionType::CONNECTION, @@ -148213,6 +148696,23 @@ shared_ptr DBInstanceCache::CreateInstance(const string &database, DBCon return db_instance; } +shared_ptr DBInstanceCache::CreateInstance(const string &database, DBConfig &config, bool cache_instance) { + lock_guard l(cache_lock); + return CreateInstanceInternal(database, config, cache_instance); +} + +shared_ptr DBInstanceCache::GetOrCreateInstance(const string &database, DBConfig &config_dict, + bool cache_instance) { + lock_guard l(cache_lock); + if (cache_instance) { + auto instance = GetInstanceInternal(database, config_dict); + if (instance) { + return instance; + } + } + return CreateInstanceInternal(database, config_dict, cache_instance); +} + } // namespace duckdb @@ -161779,6 +162279,17 @@ Value MaximumMemorySetting::GetSetting(ClientContext &context) { return Value(StringUtil::BytesToHumanReadableString(config.options.maximum_memory)); } +//===--------------------------------------------------------------------===// +// Password Setting +//===--------------------------------------------------------------------===// +void PasswordSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + // nop +} + +Value PasswordSetting::GetSetting(ClientContext &context) { + return Value(); +} + //===--------------------------------------------------------------------===// // Perfect Hash Threshold //===--------------------------------------------------------------------===// @@ -161945,6 +162456,17 @@ Value ThreadsSetting::GetSetting(ClientContext &context) { return Value::BIGINT(config.options.maximum_threads); } +//===--------------------------------------------------------------------===// +// Username Setting +//===--------------------------------------------------------------------===// +void UsernameSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + // nop +} + +Value UsernameSetting::GetSetting(ClientContext &context) { + return Value(); +} + } // namespace duckdb @@ -163825,7 +164347,8 @@ bool Deliminator::RemoveInequalityCandidate(unique_ptr *plan, u } parent_expr = make_unique(parent_expr->alias, parent_expr->return_type, it->first); - parent_cond.comparison = child_cond.comparison; + parent_cond.comparison = + parent_delim_get_side == 0 ? child_cond.comparison : FlipComparisionExpression(child_cond.comparison); break; } } @@ -164284,6 +164807,9 @@ idx_t FilterCombiner::GetEquivalenceSet(Expression *expr) { FilterResult FilterCombiner::AddConstantComparison(vector &info_list, ExpressionValueInformation info) { + if (info.constant.IsNull()) { + return FilterResult::UNSATISFIABLE; + } for (idx_t i = 0; i < info_list.size(); i++) { auto comparison = CompareValueInformation(info_list[i], info); switch (comparison) { @@ -165546,7 +166072,6 @@ unique_ptr FilterPullup::PullupJoin(unique_ptr case JoinType::LEFT: case JoinType::ANTI: case JoinType::SEMI: { - can_add_column = true; return PullupFromLeft(move(op)); } default: @@ -165749,7 +166274,7 @@ unique_ptr FilterPushdown::PushdownJoin(unique_ptrfilter)); - D_ASSERT(result == FilterResult::SUCCESS); + D_ASSERT(result != FilterResult::UNSUPPORTED); (void)result; } filters.clear(); @@ -167946,6 +168471,7 @@ class TopN { + namespace duckdb { Optimizer::Optimizer(Binder &binder, ClientContext &context) : context(context), binder(binder), rewriter(context) { @@ -167982,9 +168508,18 @@ void Optimizer::RunOptimizer(OptimizerType type, const std::function &ca profiler.StartPhase(OptimizerTypeToString(type)); callback(); profiler.EndPhase(); + if (plan) { + Verify(*plan); + } +} + +void Optimizer::Verify(LogicalOperator &op) { + ColumnBindingResolver::Verify(op); } -unique_ptr Optimizer::Optimize(unique_ptr plan) { +unique_ptr Optimizer::Optimize(unique_ptr plan_p) { + Verify(*plan_p); + this->plan = move(plan_p); // first we perform expression rewrites using the ExpressionRewriter // this does not change the logical plan structure, but only simplifies the expression trees RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); @@ -168071,7 +168606,7 @@ unique_ptr Optimizer::Optimize(unique_ptr plan Planner::VerifyPlan(context, plan); - return plan; + return move(plan); } } // namespace duckdb @@ -168084,6 +168619,8 @@ unique_ptr FilterPullup::PullupBothSide(unique_ptrchildren[0] = left_pullup.Rewrite(move(op->children[0])); op->children[1] = right_pullup.Rewrite(move(op->children[1])); + D_ASSERT(left_pullup.can_add_column == can_add_column); + D_ASSERT(right_pullup.can_add_column == can_add_column); // merging filter expressions for (idx_t i = 0; i < right_pullup.filters_expr_pullup.size(); ++i) { @@ -168108,7 +168645,8 @@ namespace duckdb { unique_ptr FilterPullup::PullupFilter(unique_ptr op) { D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER); - if (can_pullup) { + auto &filter = (LogicalFilter &)*op; + if (can_pullup && filter.projection_map.empty()) { unique_ptr child = move(op->children[0]); child = Rewrite(move(child)); // moving filter's expressions @@ -168415,6 +168953,9 @@ using Filter = FilterPushdown::Filter; unique_ptr FilterPushdown::PushdownFilter(unique_ptr op) { D_ASSERT(op->type == LogicalOperatorType::LOGICAL_FILTER); auto &filter = (LogicalFilter &)*op; + if (!filter.projection_map.empty()) { + return FinishPushdown(move(op)); + } // filter: gather the filters and remove the filter from the set of operations for (auto &expression : filter.expressions) { if (AddFilter(move(expression)) == FilterResult::UNSATISFIABLE) { @@ -168718,8 +169259,8 @@ unique_ptr FilterPushdown::PushdownMarkJoin(unique_ptr FilterPushdown::PushdownMarkJoin(unique_ptrfilter->type == ExpressionType::BOUND_COLUMN_REF) { // filter just references the marker: turn into semi join +#ifdef DEBUG + simplified_mark_join = true; +#endif join.join_type = JoinType::SEMI; filters.erase(filters.begin() + i); i--; @@ -168762,6 +169304,9 @@ unique_ptr FilterPushdown::PushdownMarkJoin(unique_ptr FilterPushdown::PushdownSetOperation(unique_ptrchildren.size() == 2); auto left_bindings = op->children[0]->GetColumnBindings(); auto right_bindings = op->children[1]->GetColumnBindings(); + if (left_bindings.size() != right_bindings.size()) { + throw InternalException("Filter pushdown - set operation LHS and RHS have incompatible counts"); + } // pushdown into set operation, we can duplicate the condition and pushdown the expressions into both sides FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer); @@ -169521,7 +170069,8 @@ unique_ptr ComparisonSimplificationRule::Apply(LogicalOperator &op, } // Is the constant cast invertible? - if (!BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) { + if (!cast_constant.IsNull() && + !BoundCastExpression::CastIsInvertible(cast_expression->return_type, target_type)) { // Is it actually invertible? Value uncast_constant; if (!cast_constant.DefaultTryCastAs(constant_value.type(), uncast_constant, &error_message, true) || @@ -193330,6 +193879,7 @@ bool Transformer::TransformGroupBy(duckdb_libpgquery::PGList *group, SelectNode + namespace duckdb { bool Transformer::TransformOrderBy(duckdb_libpgquery::PGList *order, vector &result) { @@ -193363,6 +193913,13 @@ bool Transformer::TransformOrderBy(duckdb_libpgquery::PGList *order, vectorGetExpressionClass() == ExpressionClass::STAR) { + auto &star_expr = (StarExpression &)*order_expression; + D_ASSERT(star_expr.relation_name.empty()); + if (star_expr.columns) { + throw ParserException("COLUMNS expr is not supported in ORDER BY"); + } + } result.emplace_back(type, null_order, move(order_expression)); } else { throw NotImplementedException("ORDER BY list member type %d\n", temp->type); @@ -198917,13 +199474,14 @@ class GroupBinder : public ExpressionBinder { + namespace duckdb { //! The HAVING binder is responsible for binding an expression within the HAVING clause of a SQL statement class HavingBinder : public SelectBinder { public: HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info, - case_insensitive_map_t &alias_map); + case_insensitive_map_t &alias_map, AggregateHandling aggregate_handling); protected: BindResult BindExpression(unique_ptr *expr_ptr, idx_t depth, @@ -198933,6 +199491,7 @@ class HavingBinder : public SelectBinder { BindResult BindColumnRef(unique_ptr *expr_ptr, idx_t depth, bool root_expression); ColumnAliasBinder column_alias_binder; + AggregateHandling aggregate_handling; }; } // namespace duckdb @@ -199063,6 +199622,9 @@ unique_ptr Binder::BindDelimiter(ClientContext &context, OrderBinder delimiter_value = ExpressionExecutor::EvaluateScalar(context, *expr).CastAs(context, type); return nullptr; } + if (!new_binder->correlated_columns.empty()) { + throw BinderException("Correlated columns not supported in LIMIT/OFFSET"); + } // move any correlated columns to this binder MoveCorrelatedExpressions(*new_binder); return expr; @@ -199428,16 +199990,22 @@ unique_ptr Binder::BindNode(SelectNode &statement) { // bind the HAVING clause, if any if (statement.having) { - HavingBinder having_binder(*this, context, *result, info, alias_map); + HavingBinder having_binder(*this, context, *result, info, alias_map, statement.aggregate_handling); ExpressionBinder::QualifyColumnNames(*this, statement.having); result->having = having_binder.Bind(statement.having); } // bind the QUALIFY clause, if any if (statement.qualify) { + if (statement.aggregate_handling == AggregateHandling::FORCE_AGGREGATES) { + throw BinderException("Combining QUALIFY with GROUP BY ALL is not supported yet"); + } QualifyBinder qualify_binder(*this, context, *result, info, alias_map); ExpressionBinder::QualifyColumnNames(*this, statement.qualify); result->qualify = qualify_binder.Bind(statement.qualify); + if (qualify_binder.HasBoundColumns() && qualify_binder.BoundAggregates()) { + throw BinderException("Cannot mix aggregates with non-aggregated columns!"); + } } // after that, we bind to the SELECT list @@ -200640,7 +201208,7 @@ unique_ptr Binder::PlanSubquery(BoundSubqueryExpression &expr, uniqu D_ASSERT(root); // first we translate the QueryNode of the subquery into a logical plan // note that we do not plan nested subqueries yet - auto sub_binder = Binder::CreateBinder(context); + auto sub_binder = Binder::CreateBinder(context, this); sub_binder->plan_subquery = false; auto subquery_root = sub_binder->CreatePlan(*expr.subquery); D_ASSERT(subquery_root); @@ -200856,8 +201424,8 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) { auto function_data = copy_function->function.copy_from_bind(context, *stmt.info, expected_names, bound_insert.expected_types); - auto get = make_unique(0, copy_function->function.copy_from_function, move(function_data), - bound_insert.expected_types, expected_names); + auto get = make_unique(GenerateTableIndex(), copy_function->function.copy_from_function, + move(function_data), bound_insert.expected_types, expected_names); for (idx_t i = 0; i < bound_insert.expected_types.size(); i++) { get->column_ids.push_back(i); } @@ -201536,6 +202104,7 @@ class CheckBinder : public ExpressionBinder { + #include namespace duckdb { @@ -201736,6 +202305,31 @@ void Binder::BindDefaultValues(ColumnList &columns, vector &dependencies) { + if (expr.type == ExpressionType::BOUND_FUNCTION) { + auto &function = (BoundFunctionExpression &)expr; + if (function.function.dependency) { + function.function.dependency(function, dependencies); + } + } + ExpressionIterator::EnumerateChildren( + expr, [&](Expression &child) { ExtractExpressionDependencies(child, dependencies); }); +} + +static void ExtractDependencies(BoundCreateTableInfo &info) { + for (auto &default_value : info.bound_defaults) { + if (default_value) { + ExtractExpressionDependencies(*default_value, info.dependencies); + } + } + for (auto &constraint : info.bound_constraints) { + if (constraint->type == ConstraintType::CHECK) { + auto &bound_check = (BoundCheckConstraint &)*constraint; + ExtractExpressionDependencies(*bound_check.expression, info.dependencies); + } + } +} + unique_ptr Binder::BindCreateTableInfo(unique_ptr info) { auto &base = (CreateTableInfo &)*info; @@ -201766,6 +202360,8 @@ unique_ptr Binder::BindCreateTableInfo(unique_ptrbound_defaults); } + // extract dependencies from any default values or CHECK constraints + ExtractDependencies(*result); if (base.columns.PhysicalColumnCount() == 0) { throw BinderException("Creating a table without physical (non-generated) columns is not supported"); @@ -201859,7 +202455,8 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) { unique_ptr child_operator; for (auto &using_clause : stmt.using_clauses) { // bind the using clause - auto bound_node = Bind(*using_clause); + auto using_binder = Binder::CreateBinder(context, this); + auto bound_node = using_binder->Bind(*using_clause); auto op = CreatePlan(*bound_node); if (child_operator) { // already bound a child: create a cross product to unify the two @@ -201867,6 +202464,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) { } else { child_operator = move(op); } + bind_context.AddContext(move(using_binder->bind_context)); } if (child_operator) { root = LogicalCrossProduct::Create(move(root), move(child_operator)); @@ -201885,7 +202483,7 @@ BoundStatement Binder::Bind(DeleteStatement &stmt) { root = move(filter); } // create the delete node - auto del = make_unique(table); + auto del = make_unique(table, GenerateTableIndex()); del->AddChild(move(root)); // set up the delete expression @@ -202013,6 +202611,7 @@ BoundStatement Binder::Bind(ExecuteStatement &stmt) { prepared = prepared_planner.PrepareSQLStatement(entry->second->unbound_statement->Copy()); rebound_plan = move(prepared_planner.plan); D_ASSERT(prepared->properties.bound_all_parameters); + this->bound_tables = prepared_planner.binder->bound_tables; } // copy the properties of the prepared statement into the planner this->properties = prepared->properties; @@ -202234,7 +202833,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { CopyStatement copy_stmt; copy_stmt.info = move(info); - auto copy_binder = Binder::CreateBinder(context); + auto copy_binder = Binder::CreateBinder(context, this); auto bound_statement = copy_binder->Bind(copy_stmt); if (child_operator) { // use UNION ALL to combine the individual copy statements into a single node @@ -202364,7 +202963,7 @@ BoundStatement Binder::Bind(InsertStatement &stmt) { properties.read_only = false; } - auto insert = make_unique(table); + auto insert = make_unique(table, GenerateTableIndex()); // Add CTEs as bindable AddCTEMap(stmt.cte_map); @@ -202568,6 +203167,7 @@ namespace duckdb { BoundStatement Binder::Bind(PrepareStatement &stmt) { Planner prepared_planner(context); auto prepared_data = prepared_planner.PrepareSQLStatement(move(stmt.statement)); + this->bound_tables = prepared_planner.binder->bound_tables; auto prepare = make_unique(stmt.name, move(prepared_data), move(prepared_planner.plan)); // we can prepare in read-only mode: prepared statements are not written to the catalog @@ -202694,7 +203294,7 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { BoundStatement Binder::Bind(TransactionStatement &stmt) { // transaction statements do not require a valid transaction - properties.requires_valid_transaction = false; + properties.requires_valid_transaction = stmt.info->type == TransactionType::BEGIN_TRANSACTION; BoundStatement result; result.names = {"Success"}; @@ -202988,6 +203588,13 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log BindExtraColumns(table, get, proj, update, check.bound_columns); } } + if (update.return_chunk) { + physical_index_set_t all_columns; + for (idx_t i = 0; i < table.storage->column_definitions.size(); i++) { + all_columns.insert(PhysicalIndex(i)); + } + BindExtraColumns(table, get, proj, update, all_columns); + } // for index updates we always turn any update into an insert and a delete // we thus need all the columns to be available, hence we check if the update touches any index columns // If the returning keyword is used, we need access to the whole row in case the user requests it. @@ -203010,7 +203617,7 @@ static void BindUpdateConstraints(TableCatalogEntry &table, LogicalGet &get, Log } } - if (update.update_is_del_and_insert || update.return_chunk) { + if (update.update_is_del_and_insert) { // the update updates a column required by an index or requires returning the updated rows, // push projections for all columns physical_index_set_t all_columns; @@ -203121,16 +203728,15 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) { // set the projection as child of the update node and finalize the result update->AddChild(move(proj)); + auto update_table_index = GenerateTableIndex(); + update->table_index = update_table_index; if (!stmt.returning_list.empty()) { - auto update_table_index = GenerateTableIndex(); - update->table_index = update_table_index; unique_ptr update_as_logicaloperator = move(update); return BindReturning(move(stmt.returning_list), table, update_table_index, move(update_as_logicaloperator), move(result)); } - update->table_index = 0; result.names = {"Count"}; result.types = {LogicalType::BIGINT}; result.plan = move(update); @@ -203456,6 +204062,9 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { // bind the child subquery view_binder->AddBoundView(view_catalog_entry); auto bound_child = view_binder->Bind(subquery); + if (!view_binder->correlated_columns.empty()) { + throw BinderException("Contents of view were altered - view bound correlated columns"); + } D_ASSERT(bound_child->type == TableReferenceType::SUBQUERY); // verify that the types and names match up with the expected types and names @@ -203967,6 +204576,33 @@ unique_ptr Binder::Bind(SubqueryRef &ref, CommonTableExpressionIn +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/planner/expression_binder/table_function_binder.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +//! The Table function binder can bind standard table function parameters (i.e. non-table-in-out functions) +class TableFunctionBinder : public ExpressionBinder { +public: + TableFunctionBinder(Binder &binder, ClientContext &context); + +protected: + BindResult BindColumnReference(ColumnRefExpression &expr); + BindResult BindExpression(unique_ptr *expr, idx_t depth, bool root_expression = false) override; + + string UnsupportedAggregateMessage() override; +}; + +} // namespace duckdb @@ -204043,17 +204679,17 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi continue; } - ConstantBinder binder(*this, context, "TABLE FUNCTION parameter"); + TableFunctionBinder binder(*this, context); LogicalType sql_type; auto expr = binder.Bind(child, &sql_type); if (expr->HasParameter()) { throw ParameterNotResolvedException(); } - if (!expr->IsFoldable()) { + if (!expr->IsScalar()) { error = "Table function requires a constant parameter"; return false; } - auto constant = ExpressionExecutor::EvaluateScalar(context, *expr); + auto constant = ExpressionExecutor::EvaluateScalar(context, *expr, true); if (parameter_name.empty()) { // unnamed parameter if (!named_parameters.empty()) { @@ -207006,8 +207642,9 @@ BindResult GroupBinder::BindColumnRef(ColumnRefExpression &colref) { namespace duckdb { HavingBinder::HavingBinder(Binder &binder, ClientContext &context, BoundSelectNode &node, BoundGroupInformation &info, - case_insensitive_map_t &alias_map) - : SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map) { + case_insensitive_map_t &alias_map, AggregateHandling aggregate_handling) + : SelectBinder(binder, context, node, info), column_alias_binder(node, alias_map), + aggregate_handling(aggregate_handling) { target_type = LogicalType(LogicalTypeId::BOOLEAN); } @@ -207017,7 +207654,16 @@ BindResult HavingBinder::BindColumnRef(unique_ptr *expr_ptr, i if (!alias_result.HasError()) { return alias_result; } - + if (aggregate_handling == AggregateHandling::FORCE_AGGREGATES) { + auto expr = duckdb::SelectBinder::BindExpression(expr_ptr, depth); + if (expr.HasError()) { + return expr; + } + auto group_ref = make_unique( + expr.expression->return_type, ColumnBinding(node.group_index, node.groups.group_expressions.size())); + node.groups.group_expressions.push_back(move(expr.expression)); + return BindResult(move(group_ref)); + } return BindResult(StringUtil::Format( "column %s must appear in the GROUP BY clause or be used in an aggregate function", expr.ToString())); } @@ -207499,6 +208145,42 @@ BindResult SelectBinder::BindGroup(ParsedExpression &expr, idx_t depth, idx_t gr } // namespace duckdb + + +namespace duckdb { + +TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) { +} + +BindResult TableFunctionBinder::BindColumnReference(ColumnRefExpression &expr) { + auto result_name = StringUtil::Join(expr.column_names, "."); + return BindResult(make_unique(Value(result_name))); +} + +BindResult TableFunctionBinder::BindExpression(unique_ptr *expr_ptr, idx_t depth, + bool root_expression) { + auto &expr = **expr_ptr; + switch (expr.GetExpressionClass()) { + case ExpressionClass::COLUMN_REF: + return BindColumnReference((ColumnRefExpression &)expr); + case ExpressionClass::SUBQUERY: + throw BinderException("Table function cannot contain subqueries"); + case ExpressionClass::DEFAULT: + return BindResult("Table function cannot contain DEFAULT clause"); + case ExpressionClass::WINDOW: + return BindResult("Table function cannot contain window functions!"); + default: + return ExpressionBinder::BindExpression(expr_ptr, depth); + } +} + +string TableFunctionBinder::UnsupportedAggregateMessage() { + return "Table function cannot contain aggregates!"; +} + +} // namespace duckdb + + namespace duckdb { UpdateBinder::UpdateBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) { @@ -208433,6 +209115,7 @@ JoinSide JoinSide::GetJoinSide(const unordered_set &bindings, unordered_s + namespace duckdb { const uint64_t PLAN_SERIALIZATION_VERSION = 1; @@ -208764,7 +209447,8 @@ unique_ptr LogicalOperator::Deserialize(Deserializer &deseriali result = LogicalSimple::Deserialize(state, reader); break; case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR: - throw SerializationException("Invalid type for operator deserialization"); + result = LogicalExtensionOperator::Deserialize(state, reader); + break; case LogicalOperatorType::LOGICAL_INVALID: /* no default here to trigger a warning if we forget to implement deserialize for a new operator */ throw SerializationException("Invalid type for operator deserialization"); @@ -208776,6 +209460,10 @@ unique_ptr LogicalOperator::Deserialize(Deserializer &deseriali return result; } +vector LogicalOperator::GetTableIndex() const { + return vector {}; +} + unique_ptr LogicalOperator::Copy(ClientContext &context) const { BufferedSerializer logical_op_serializer; try { @@ -209167,6 +209855,14 @@ idx_t LogicalAggregate::EstimateCardinality(ClientContext &context) { return LogicalOperator::EstimateCardinality(context); } +vector LogicalAggregate::GetTableIndex() const { + vector result {group_index, aggregate_index}; + if (groupings_index != DConstants::INVALID_INDEX) { + result.push_back(groupings_index); + } + return result; +} + } // namespace duckdb @@ -209233,6 +209929,10 @@ unique_ptr LogicalColumnDataGet::Deserialize(LogicalDeserializa return make_unique(table_index, move(chunk_types), move(collection)); } +vector LogicalColumnDataGet::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -209497,6 +210197,10 @@ unique_ptr LogicalCTERef::Deserialize(LogicalDeserializationSta return make_unique(table_index, cte_index, chunk_types, bound_columns); } +vector LogicalCTERef::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -209517,8 +210221,8 @@ unique_ptr LogicalDelete::Deserialize(LogicalDeserializationSta TableCatalogEntry *table_catalog_entry = catalog.GetEntry(context, info->schema, info->table); - auto result = make_unique(table_catalog_entry); - result->table_index = reader.ReadRequired(); + auto table_index = reader.ReadRequired(); + auto result = make_unique(table_catalog_entry, table_index); result->return_chunk = reader.ReadRequired(); return move(result); } @@ -209527,6 +210231,10 @@ idx_t LogicalDelete::EstimateCardinality(ClientContext &context) { return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1; } +vector LogicalDelete::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -209544,6 +210252,10 @@ unique_ptr LogicalDelimGet::Deserialize(LogicalDeserializationS return make_unique(table_index, chunk_types); } +vector LogicalDelimGet::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -209611,6 +210323,10 @@ unique_ptr LogicalDummyScan::Deserialize(LogicalDeserialization return make_unique(table_index); } +vector LogicalDummyScan::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -209719,6 +210435,28 @@ unique_ptr LogicalExpressionGet::Deserialize(LogicalDeserializa return make_unique(table_index, expr_types, move(expressions)); } +vector LogicalExpressionGet::GetTableIndex() const { + return vector {table_index}; +} + +} // namespace duckdb + + + +namespace duckdb { +unique_ptr LogicalExtensionOperator::Deserialize(LogicalDeserializationState &state, + FieldReader &reader) { + auto &config = DBConfig::GetConfig(state.gstate.context); + + auto extension_name = reader.ReadRequired(); + for (auto &extension : config.operator_extensions) { + if (extension->GetName() == extension_name) { + return extension->Deserialize(state, reader); + } + } + + throw SerializationException("No serialization method exists for extension: " + extension_name); +} } // namespace duckdb @@ -209956,6 +210694,10 @@ unique_ptr LogicalGet::Deserialize(LogicalDeserializationState return move(result); } +vector LogicalGet::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -210001,10 +210743,9 @@ unique_ptr LogicalInsert::Deserialize(LogicalDeserializationSta throw InternalException("Cant find catalog entry for table %s", info->table); } - auto result = make_unique(table_catalog_entry); + auto result = make_unique(table_catalog_entry, table_index); result->type = state.type; result->table = table_catalog_entry; - result->table_index = table_index; result->return_chunk = return_chunk; result->insert_values = move(insert_values); result->column_index_map = column_index_map; @@ -210017,6 +210758,10 @@ idx_t LogicalInsert::EstimateCardinality(ClientContext &context) { return return_chunk ? LogicalOperator::EstimateCardinality(context) : 1; } +vector LogicalInsert::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -210253,6 +210998,10 @@ unique_ptr LogicalProjection::Deserialize(LogicalDeserializatio return make_unique(table_index, move(expressions)); } +vector LogicalProjection::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -210273,6 +211022,10 @@ unique_ptr LogicalRecursiveCTE::Deserialize(LogicalDeserializat return unique_ptr(new LogicalRecursiveCTE(table_index, column_count, union_all, state.type)); } +vector LogicalRecursiveCTE::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -210291,7 +211044,12 @@ vector LogicalSample::GetColumnBindings() { idx_t LogicalSample::EstimateCardinality(ClientContext &context) { auto child_cardinality = children[0]->EstimateCardinality(context); if (sample_options->is_percentage) { - return idx_t(child_cardinality * sample_options->sample_size.GetValue()); + double sample_cardinality = + double(child_cardinality) * (sample_options->sample_size.GetValue() / 100.0); + if (sample_cardinality > double(child_cardinality)) { + return child_cardinality; + } + return idx_t(sample_cardinality); } else { auto sample_size = sample_options->sample_size.GetValue(); if (sample_size < child_cardinality) { @@ -210355,6 +211113,11 @@ unique_ptr LogicalSetOperation::Deserialize(LogicalDeserializat // TODO(stephwang): review if unique_ptr plan is needed return unique_ptr(new LogicalSetOperation(table_index, column_count, state.type)); } + +vector LogicalSetOperation::GetTableIndex() const { + return vector {table_index}; +} + } // namespace duckdb @@ -210453,6 +211216,11 @@ unique_ptr LogicalUnnest::Deserialize(LogicalDeserializationSta result->expressions = move(expressions); return move(result); } + +vector LogicalUnnest::GetTableIndex() const { + return vector {unnest_index}; +} + } // namespace duckdb @@ -210527,6 +211295,10 @@ unique_ptr LogicalWindow::Deserialize(LogicalDeserializationSta return move(result); } +vector LogicalWindow::GetTableIndex() const { + return vector {window_index}; +} + } // namespace duckdb @@ -210659,7 +211431,7 @@ void Planner::CreatePlan(SQLStatement &statement) { this->plan = nullptr; for (auto &extension_op : config.operator_extensions) { auto bound_statement = - extension_op.Bind(context, *this->binder, extension_op.operator_info.get(), statement); + extension_op->Bind(context, *this->binder, extension_op->operator_info.get(), statement); if (bound_statement.plan != nullptr) { this->names = bound_statement.names; this->types = bound_statement.types; @@ -211058,10 +211830,13 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal // we reached a node without correlated expressions // we can eliminate the dependent join now and create a simple cross product // now create the duplicate eliminated scan for this node + auto left_columns = plan->GetColumnBindings().size(); auto delim_index = binder.GenerateTableIndex(); this->base_binding = ColumnBinding(delim_index, 0); + this->delim_offset = 0; + this->data_offset = left_columns; auto delim_scan = make_unique(delim_index, delim_types); - return LogicalCrossProduct::Create(move(delim_scan), move(plan)); + return LogicalCrossProduct::Create(move(plan), move(delim_scan)); } switch (plan->type) { case LogicalOperatorType::LOGICAL_UNNEST: @@ -211425,8 +212200,19 @@ unique_ptr FlattenDependentJoins::PushDownDependentJoinInternal case LogicalOperatorType::LOGICAL_UNION: { auto &setop = (LogicalSetOperation &)*plan; // set operator, push into both children +#ifdef DEBUG + plan->children[0]->ResolveOperatorTypes(); + plan->children[1]->ResolveOperatorTypes(); + D_ASSERT(plan->children[0]->types == plan->children[1]->types); +#endif plan->children[0] = PushDownDependentJoin(move(plan->children[0])); plan->children[1] = PushDownDependentJoin(move(plan->children[1])); +#ifdef DEBUG + D_ASSERT(plan->children[0]->GetColumnBindings().size() == plan->children[1]->GetColumnBindings().size()); + plan->children[0]->ResolveOperatorTypes(); + plan->children[1]->ResolveOperatorTypes(); + D_ASSERT(plan->children[0]->types == plan->children[1]->types); +#endif // we have to refer to the setop index now base_binding.table_index = setop.table_index; base_binding.column_index = setop.column_count; @@ -212334,6 +213120,7 @@ BlockHandle::~BlockHandle() { } else { D_ASSERT(memory_charge.size == 0); } + buffer_manager.PurgeQueue(); block_manager.UnregisterBlock(block_id, can_destroy); } @@ -212360,7 +213147,7 @@ unique_ptr BufferManager::ConstructManagedBuffer(idx_t size, unique_ FileBufferType type) { if (source) { auto tmp = move(source); - D_ASSERT(tmp->size == size); + D_ASSERT(tmp->AllocSize() == BufferManager::GetAllocSize(size)); return make_unique(*tmp, type); } else { // no re-usable buffer: allocate a new buffer @@ -212495,7 +213282,7 @@ void BufferManager::SetTemporaryDirectory(string new_dir) { BufferManager::BufferManager(DatabaseInstance &db, string tmp, idx_t maximum_memory) : db(db), current_memory(0), maximum_memory(maximum_memory), temp_directory(move(tmp)), - queue(make_unique()), temporary_id(MAXIMUM_BLOCK), + queue(make_unique()), temporary_id(MAXIMUM_BLOCK), queue_insertions(0), buffer_allocator(BufferAllocatorAllocate, BufferAllocatorFree, BufferAllocatorRealloc, make_unique(*this)) { temp_block_manager = make_unique(*this); @@ -212571,6 +213358,7 @@ TempBufferPoolReservation BufferManager::EvictBlocksOrThrow(idx_t memory_delta, } shared_ptr BufferManager::RegisterSmallMemory(idx_t block_size) { + D_ASSERT(block_size < Storage::BLOCK_SIZE); auto res = EvictBlocksOrThrow(block_size, maximum_memory, nullptr, "could not allocate block of %lld bytes (%lld/%lld used) %s", block_size, GetUsedMemory(), GetMaxMemory()); @@ -212583,7 +213371,7 @@ shared_ptr BufferManager::RegisterSmallMemory(idx_t block_size) { shared_ptr BufferManager::RegisterMemory(idx_t block_size, bool can_destroy) { D_ASSERT(block_size >= Storage::BLOCK_SIZE); - auto alloc_size = AlignValue(block_size + Storage::BLOCK_HEADER_SIZE); + auto alloc_size = GetAllocSize(block_size); // first evict blocks until we have enough memory to store this buffer unique_ptr reusable_buffer; auto res = EvictBlocksOrThrow(alloc_size, maximum_memory, &reusable_buffer, @@ -212597,9 +213385,11 @@ shared_ptr BufferManager::RegisterMemory(idx_t block_size, bool can move(res)); } -BufferHandle BufferManager::Allocate(idx_t block_size) { - auto block = RegisterMemory(block_size, true); - return Pin(block); +BufferHandle BufferManager::Allocate(idx_t block_size, bool can_destroy, shared_ptr *block) { + shared_ptr local_block; + auto block_ptr = block ? block : &local_block; + *block_ptr = RegisterMemory(block_size, can_destroy); + return Pin(*block_ptr); } void BufferManager::ReAllocate(shared_ptr &handle, idx_t block_size) { @@ -212629,6 +213419,7 @@ void BufferManager::ReAllocate(shared_ptr &handle, idx_t block_size // resize and adjust current memory handle->buffer->Resize(block_size); handle->memory_usage += memory_delta; + D_ASSERT(handle->memory_usage == handle->buffer->AllocSize()); } BufferHandle BufferManager::Pin(shared_ptr &handle) { @@ -212669,6 +213460,7 @@ BufferHandle BufferManager::Pin(shared_ptr &handle) { handle->memory_usage += delta; handle->memory_charge.Resize(current_memory, handle->memory_usage); } + D_ASSERT(handle->memory_usage == handle->buffer->AllocSize()); return buf; } @@ -212961,7 +213753,9 @@ class TemporaryFileHandle { // as a result we can truncate the file auto max_index = index_manager.GetMaxIndex(); auto &fs = FileSystem::GetFileSystem(db); +#ifndef WIN32 // this ended up causing issues when sorting fs.Truncate(*handle, GetPositionInFile(max_index + 1)); +#endif } } @@ -213237,6 +214031,9 @@ void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data data_ptr_t BufferManager::BufferAllocatorRealloc(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t old_size, idx_t size) { + if (old_size == size) { + return pointer; + } auto &data = (BufferAllocatorData &)*private_data; BufferPoolReservation r; r.size = old_size; @@ -213250,6 +214047,10 @@ Allocator &BufferAllocator::Get(ClientContext &context) { return manager.GetBufferAllocator(); } +Allocator &BufferAllocator::Get(DatabaseInstance &db) { + return BufferManager::GetBufferManager(db).GetBufferAllocator(); +} + Allocator &BufferManager::GetBufferAllocator() { return buffer_allocator; } @@ -218819,11 +219620,15 @@ class DictionaryCompressionState : public CompressionState { new_string = !LookupString(data[idx]); } - bool fits = HasEnoughSpace(new_string, string_size); + bool fits = CalculateSpaceRequirements(new_string, string_size); if (!fits) { Flush(); new_string = true; - D_ASSERT(HasEnoughSpace(new_string, string_size)); + + fits = CalculateSpaceRequirements(new_string, string_size); + if (!fits) { + throw InternalException("Dictionary compression could not write to new segment"); + } } if (!row_is_valid) { @@ -218851,8 +219656,8 @@ class DictionaryCompressionState : public CompressionState { virtual void AddNewString(string_t str) = 0; // Add a null value to the compression state virtual void AddNull() = 0; - // Check if we have enough space to add a string - virtual bool HasEnoughSpace(bool new_string, size_t string_size) = 0; + // Needs to be called before adding a value. Will return false if a flush is required first. + virtual bool CalculateSpaceRequirements(bool new_string, size_t string_size) = 0; // Flush the segment to disk if compressing or reset the counters if analyzing virtual void Flush(bool final = false) = 0; }; @@ -218909,7 +219714,8 @@ struct DictionaryCompressionStorage { // scanning the whole dictionary at once and then scanning the selection buffer for each emitted vector. Secondly, it // allows for efficient bitpacking compression as the selection values should remain relatively small. struct DictionaryCompressionCompressState : public DictionaryCompressionState { - explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer) : checkpointer(checkpointer) { + explicit DictionaryCompressionCompressState(ColumnDataCheckpointer &checkpointer) + : checkpointer(checkpointer), heap(BufferAllocator::Get(checkpointer.GetDatabase())) { auto &db = checkpointer.GetDatabase(); auto &config = DBConfig::GetConfig(db); function = config.GetCompressionFunction(CompressionType::COMPRESSION_DICTIONARY, PhysicalType::VARCHAR); @@ -219015,7 +219821,7 @@ struct DictionaryCompressionCompressState : public DictionaryCompressionState { current_segment->count++; } - bool HasEnoughSpace(bool new_string, size_t string_size) override { + bool CalculateSpaceRequirements(bool new_string, size_t string_size) override { if (new_string) { next_width = BitpackingPrimitives::MinimumBitWidth(index_buffer.size() - 1 + new_string); return DictionaryCompressionStorage::HasEnoughSpace(current_segment->count.load() + 1, @@ -219136,7 +219942,7 @@ struct DictionaryAnalyzeState : public DictionaryCompressionState { current_tuple_count++; } - bool HasEnoughSpace(bool new_string, size_t string_size) override { + bool CalculateSpaceRequirements(bool new_string, size_t string_size) override { if (new_string) { next_width = BitpackingPrimitives::MinimumBitWidth(current_unique_count + 2); // 1 for null, one for new string @@ -222387,8 +223193,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string new_block->offset = 0; new_block->size = alloc_size; // allocate an in-memory buffer for it - block = buffer_manager.RegisterMemory(alloc_size, false); - handle = buffer_manager.Pin(block); + handle = buffer_manager.Allocate(alloc_size, false, &block); state.overflow_blocks[block->BlockId()] = new_block.get(); new_block->block = move(block); new_block->next = move(state.head); @@ -223823,7 +224628,12 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t bool append_failed = false; // now append the entries to the indices indexes.Scan([&](Index &index) { - if (!index.Append(chunk, row_identifiers)) { + try { + if (!index.Append(chunk, row_identifiers)) { + append_failed = true; + return true; + } + } catch (...) { append_failed = true; return true; } @@ -223837,7 +224647,6 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t for (auto *index : already_appended) { index->Delete(chunk, row_identifiers); } - return false; } return true; @@ -224480,12 +225289,21 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta append_state.current_row); } if (constraint_violated) { + PreservedError error; // need to revert the append row_t current_row = append_state.row_start; // remove the data from the indexes, if there are any indexes row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool { // append this chunk to the indexes of the table - table->RemoveFromIndexes(append_state, chunk, current_row); + try { + table->RemoveFromIndexes(append_state, chunk, current_row); + } catch (Exception &ex) { + error = PreservedError(ex); + return false; + } catch (std::exception &ex) { + error = PreservedError(ex); + return false; + } current_row += chunk.size(); if (current_row >= append_state.current_row) { @@ -224497,6 +225315,9 @@ void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendSta if (append_to_table) { table->RevertAppendInternal(append_state.row_start, append_count); } + if (error) { + error.Throw(); + } throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key"); } } @@ -224628,7 +225449,7 @@ void LocalStorage::InitializeAppend(LocalAppendState &state, DataTable *table) { void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) { // append to unique indices (if any) auto storage = state.storage; - idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows(); + idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows() + state.append_state.total_append_count; if (!DataTable::AppendToIndexes(storage->indexes, chunk, base_id)) { throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key"); } @@ -225410,6 +226231,7 @@ block_id_t SingleFileBlockManager::GetFreeBlockId() { void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) { lock_guard lock(block_lock); D_ASSERT(block_id >= 0); + D_ASSERT(block_id < max_block); D_ASSERT(free_list.find(block_id) == free_list.end()); multi_use_blocks.erase(block_id); free_list.insert(block_id); @@ -225418,6 +226240,7 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) { void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) { lock_guard lock(block_lock); D_ASSERT(block_id >= 0); + D_ASSERT(block_id < max_block); // check if the block is a multi-use block auto entry = multi_use_blocks.find(block_id); @@ -225440,6 +226263,8 @@ void SingleFileBlockManager::MarkBlockAsModified(block_id_t block_id) { void SingleFileBlockManager::IncreaseBlockReferenceCount(block_id_t block_id) { lock_guard lock(block_lock); + D_ASSERT(block_id >= 0); + D_ASSERT(block_id < max_block); D_ASSERT(free_list.find(block_id) == free_list.end()); auto entry = multi_use_blocks.find(block_id); if (entry != multi_use_blocks.end()) { @@ -228521,7 +229346,7 @@ unique_ptr ColumnSegment::CreatePersistentSegment(DatabaseInstanc block = block_manager.RegisterBlock(block_id); } auto segment_size = Storage::BLOCK_SIZE; - return make_unique(db, block, type, ColumnSegmentType::PERSISTENT, start, count, function, + return make_unique(db, move(block), type, ColumnSegmentType::PERSISTENT, start, count, function, move(statistics), block_id, offset, segment_size); } @@ -228535,9 +229360,9 @@ unique_ptr ColumnSegment::CreateTransientSegment(DatabaseInstance if (segment_size < Storage::BLOCK_SIZE) { block = buffer_manager.RegisterSmallMemory(segment_size); } else { - block = buffer_manager.RegisterMemory(segment_size, false); + buffer_manager.Allocate(segment_size, false, &block); } - return make_unique(db, block, type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr, + return make_unique(db, move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function, nullptr, INVALID_BLOCK, 0, segment_size); } @@ -228618,9 +229443,9 @@ void ColumnSegment::Resize(idx_t new_size) { D_ASSERT(new_size > this->segment_size); D_ASSERT(offset == 0); auto &buffer_manager = BufferManager::GetBufferManager(db); - auto new_block = buffer_manager.RegisterMemory(Storage::BLOCK_SIZE, false); auto old_handle = buffer_manager.Pin(block); - auto new_handle = buffer_manager.Pin(new_block); + shared_ptr new_block; + auto new_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block); memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size); this->block_id = new_block->BlockId(); this->block = move(new_block); @@ -232068,7 +232893,8 @@ static UpdateSegment::rollback_update_function_t GetRollbackUpdateFunction(Physi static UpdateSegment::statistics_update_function_t GetStatisticsUpdateFunction(PhysicalType type); static UpdateSegment::fetch_row_function_t GetFetchRowFunction(PhysicalType type); -UpdateSegment::UpdateSegment(ColumnData &column_data) : column_data(column_data), stats(column_data.type) { +UpdateSegment::UpdateSegment(ColumnData &column_data) + : column_data(column_data), stats(column_data.type), heap(BufferAllocator::Get(column_data.GetDatabase())) { auto physical_type = column_data.type.InternalType(); this->type_size = GetTypeIdSize(physical_type); @@ -234287,7 +235113,10 @@ void CleanupState::Flush() { Vector row_identifiers(LogicalType::ROW_TYPE, (data_ptr_t)row_numbers); // delete the tuples from all the indexes - current_table->RemoveFromIndexes(row_identifiers, count); + try { + current_table->RemoveFromIndexes(row_identifiers, count); + } catch (...) { + } count = 0; } @@ -238579,626 +239408,628 @@ size_t duckdb_fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* sy Symbol concat(Symbol a, Symbol b) { - Symbol s; - u32 length = a.length()+b.length(); - if (length > Symbol::maxLength) length = Symbol::maxLength; - s.set_code_len(FSST_CODE_MASK, length); - s.val.num = (b.val.num << (8*a.length())) | a.val.num; - return s; + Symbol s; + u32 length = a.length()+b.length(); + if (length > Symbol::maxLength) length = Symbol::maxLength; + s.set_code_len(FSST_CODE_MASK, length); + s.val.num = (b.val.num << (8*a.length())) | a.val.num; + return s; } namespace std { template <> class hash { - public: - size_t operator()(const QSymbol& q) const { - uint64_t k = q.symbol.val.num; - const uint64_t m = 0xc6a4a7935bd1e995; - const int r = 47; - uint64_t h = 0x8445d61a4e774912 ^ (8*m); - k *= m; - k ^= k >> r; - k *= m; - h ^= k; - h *= m; - h ^= h >> r; - h *= m; - h ^= h >> r; - return h; - } +public: + size_t operator()(const QSymbol& q) const { + uint64_t k = q.symbol.val.num; + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + uint64_t h = 0x8445d61a4e774912 ^ (8*m); + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; + h ^= h >> r; + h *= m; + h ^= h >> r; + return h; + } }; } bool isEscapeCode(u16 pos) { return pos < FSST_CODE_BASE; } std::ostream& operator<<(std::ostream& out, const Symbol& s) { - for (u32 i=0; i line, size_t len[], bool zeroTerminated=false) { - SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable(); - int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception) - size_t sampleFrac = 128; - - // start by determining the terminator. We use the (lowest) most infrequent byte as terminator - st->zeroTerminated = zeroTerminated; - if (zeroTerminated) { - st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency - } else { - u16 byteHisto[256]; - memset(byteHisto, 0, sizeof(byteHisto)); - for(size_t i=0; iterminator = 256; - while(i-- > 0) { - if (byteHisto[i] > minSize) continue; - st->terminator = i; - minSize = byteHisto[i]; - } - } - assert(st->terminator != 256); + SymbolTable *st = new SymbolTable(), *bestTable = new SymbolTable(); + int bestGain = (int) -FSST_SAMPLEMAXSZ; // worst case (everything exception) + size_t sampleFrac = 128; + + // start by determining the terminator. We use the (lowest) most infrequent byte as terminator + st->zeroTerminated = zeroTerminated; + if (zeroTerminated) { + st->terminator = 0; // except in case of zeroTerminated mode, then byte 0 is terminator regardless frequency + } else { + u16 byteHisto[256]; + memset(byteHisto, 0, sizeof(byteHisto)); + for(size_t i=0; iterminator = 256; + while(i-- > 0) { + if (byteHisto[i] > minSize) continue; + st->terminator = i; + minSize = byteHisto[i]; + } + } + assert(st->terminator != 256); + + // a random number between 0 and 128 + auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); }; + + // compress sample, and compute (pair-)frequencies + auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain + int gain = 0; + + for(size_t i=0; i sampleFrac) continue; + } + if (cur < end) { + u8* start = cur; + u16 code2 = 255, code1 = st->findLongestSymbol(cur, end); + cur += st->symbols[code1].length(); + gain += (int) (st->symbols[code1].length()-(1+isEscapeCode(code1))); + while (true) { + // count single symbol (i.e. an option is not extending it) + counters.count1Inc(code1); - // a random number between 0 and 128 - auto rnd128 = [&](size_t i) { return 1 + (FSST_HASH((i+1UL)*sampleFrac)&127); }; + // as an alternative, consider just using the next byte.. + if (st->symbols[code1].length() != 1) // .. but do not count single byte symbols doubly + counters.count1Inc(*start); - // compress sample, and compute (pair-)frequencies - auto compressCount = [&](SymbolTable *st, Counters &counters) { // returns gain - int gain = 0; + if (cur==end) { + break; + } - for(size_t i=0; ihashTabSize-1); + Symbol s = st->hashTab[idx]; + code2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK; + word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); + if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) { + code2 = s.code(); + cur += s.length(); + } else if (code2 >= FSST_CODE_BASE) { + cur += 2; + } else { + code2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK; + cur += 1; + } + } else { + code2 = st->findLongestSymbol(cur, end); + cur += st->symbols[code2].length(); + } - if (sampleFrac < 128) { - // in earlier rounds (sampleFrac < 128) we skip data in the sample (reduces overall work ~2x) - if (rnd128(i) > sampleFrac) continue; - } - if (cur < end) { - u16 pos2 = 255, pos1 = st->findLongestSymbol(cur, end); - cur += st->symbols[pos1].length(); - gain += (int) (st->symbols[pos1].length()-(1+isEscapeCode(pos1))); - while (true) { - u8* old = cur; - counters.count1Inc(pos1); - // count single symbol (i.e. an option is not extending it) - if (cur>=end) - break; - if (st->symbols[pos1].length() != 1) - counters.count1Inc(*cur); - if (curhashTabSize-1); - Symbol s = st->hashTab[idx]; - pos2 = st->shortCodes[word & 0xFFFF] & FSST_CODE_MASK; - word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) & (s.val.num == word)) { - pos2 = s.code(); - cur += s.length(); - } else if (pos2 >= FSST_CODE_BASE) { - cur += 2; - } else { - pos2 = st->byteCodes[word & 0xFF] & FSST_CODE_MASK; - cur += 1; - } - } else { - assert(curfindLongestSymbol(cur, end); - cur += st->symbols[pos2].length(); - } - - // compute compressed output size - gain += ((int) (cur-old))-(1+isEscapeCode(pos2)); - - // now count the subsequent two symbols we encode as an extension possibility - if (sampleFrac < 128) { // no need to count pairs in final round - counters.count2Inc(pos1, pos2); - if ((cur-old) > 1) // do not count escaped bytes doubly - counters.count2Inc(pos1, *old); - } - pos1 = pos2; - } - } - } - return gain; - }; + // compute compressed output size + gain += ((int) (cur-start))-(1+isEscapeCode(code2)); - auto makeTable = [&](SymbolTable *st, Counters &counters) { - // hashmap of c (needed because we can generate duplicate candidates) - unordered_set cands; - - // artificially make terminater the most frequent symbol so it gets included - u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator; - counters.count1Set(terminator,65535); - - auto addOrInc = [&](unordered_set &cands, Symbol s, u64 count) { - if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!! - QSymbol q; - q.symbol = s; - q.gain = count * s.length(); - auto it = cands.find(q); - if (it != cands.end()) { - q.gain += (*it).gain; - cands.erase(*it); - } - cands.insert(q); - }; + // now count the subsequent two symbols we encode as an extension codesibility + if (sampleFrac < 128) { // no need to count pairs in final round + // consider the symbol that is the concatenation of the two last symbols + counters.count2Inc(code1, code2); - // add candidate symbols based on counted frequency - for (u32 pos1=0; pos1nSymbols; pos1++) { - u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!! - if (!cnt1) continue; + // as an alternative, consider just extending with the next byte.. + if ((cur-start) > 1) // ..but do not count single byte extensions doubly + counters.count2Inc(code1, *start); + } + code1 = code2; + } + } + } + return gain; + }; - // heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed - Symbol s1 = st->symbols[pos1]; - addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1); + auto makeTable = [&](SymbolTable *st, Counters &counters) { + // hashmap of c (needed because we can generate duplicate candidates) + unordered_set cands; + + // artificially make terminater the most frequent symbol so it gets included + u16 terminator = st->nSymbols?FSST_CODE_BASE:st->terminator; + counters.count1Set(terminator,65535); + + auto addOrInc = [&](unordered_set &cands, Symbol s, u64 count) { + if (count < (5*sampleFrac)/128) return; // improves both compression speed (less candidates), but also quality!! + QSymbol q; + q.symbol = s; + q.gain = count * s.length(); + auto it = cands.find(q); + if (it != cands.end()) { + q.gain += (*it).gain; + cands.erase(*it); + } + cands.insert(q); + }; - if (sampleFrac >= 128 || // last round we do not create new (combined) symbols - s1.length() == Symbol::maxLength || // symbol cannot be extended - s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte - continue; - } - for (u32 pos2=0; pos2nSymbols; pos2++) { - u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!! - if (!cnt2) continue; - - // create a new symbol - Symbol s2 = st->symbols[pos2]; - Symbol s3 = concat(s1, s2); - if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte - addOrInc(cands, s3, cnt2); - } - } + // add candidate symbols based on counted frequency + for (u32 pos1=0; pos1nSymbols; pos1++) { + u32 cnt1 = counters.count1GetNext(pos1); // may advance pos1!! + if (!cnt1) continue; - // insert candidates into priority queue (by gain) - auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); }; - priority_queue,decltype(cmpGn)> pq(cmpGn); - for (auto& q : cands) - pq.push(q); - - // Create new symbol map using best candidates - st->clear(); - while (st->nSymbols < 255 && !pq.empty()) { - QSymbol q = pq.top(); - pq.pop(); - st->add(q.symbol); - } - }; + // heuristic: promoting single-byte symbols (*8) helps reduce exception rates and increases [de]compression speed + Symbol s1 = st->symbols[pos1]; + addOrInc(cands, s1, ((s1.length()==1)?8LL:1LL)*cnt1); - u8 bestCounters[512*sizeof(u16)]; + if (sampleFrac >= 128 || // last round we do not create new (combined) symbols + s1.length() == Symbol::maxLength || // symbol cannot be extended + s1.val.str[0] == st->terminator) { // multi-byte symbols cannot contain the terminator byte + continue; + } + for (u32 pos2=0; pos2nSymbols; pos2++) { + u32 cnt2 = counters.count2GetNext(pos1, pos2); // may advance pos2!! + if (!cnt2) continue; + + // create a new symbol + Symbol s2 = st->symbols[pos2]; + Symbol s3 = concat(s1, s2); + if (s2.val.str[0] != st->terminator) // multi-byte symbols cannot contain the terminator byte + addOrInc(cands, s3, cnt2); + } + } + + // insert candidates into priority queue (by gain) + auto cmpGn = [](const QSymbol& q1, const QSymbol& q2) { return (q1.gain < q2.gain) || (q1.gain == q2.gain && q1.symbol.val.num > q2.symbol.val.num); }; + priority_queue,decltype(cmpGn)> pq(cmpGn); + for (auto& q : cands) + pq.push(q); + + // Create new symbol map using best candidates + st->clear(); + while (st->nSymbols < 255 && !pq.empty()) { + QSymbol q = pq.top(); + pq.pop(); + st->add(q.symbol); + } + }; + + u8 bestCounters[512*sizeof(u16)]; #ifdef NONOPT_FSST - for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) { - sampleFrac = frac; + for(size_t frac : {127, 127, 127, 127, 127, 127, 127, 127, 127, 128}) { + sampleFrac = frac; #else - for(sampleFrac=8; true; sampleFrac += 30) { + for(sampleFrac=8; true; sampleFrac += 30) { #endif - memset(&counters, 0, sizeof(Counters)); - long gain = compressCount(st, counters); - if (gain >= bestGain) { // a new best solution! - counters.backup1(bestCounters); - *bestTable = *st; bestGain = gain; - } - if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128) - makeTable(st, counters); - } - delete st; - counters.restore1(bestCounters); - makeTable(bestTable, counters); - bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression - return bestTable; + memset(&counters, 0, sizeof(Counters)); + long gain = compressCount(st, counters); + if (gain >= bestGain) { // a new best solution! + counters.backup1(bestCounters); + *bestTable = *st; bestGain = gain; + } + if (sampleFrac >= 128) break; // we do 5 rounds (sampleFrac=8,38,68,98,128) + makeTable(st, counters); + } + delete st; + counters.restore1(bestCounters); + makeTable(bestTable, counters); + bestTable->finalize(zeroTerminated); // renumber codes for more efficient compression + return bestTable; } static inline size_t compressSIMD(SymbolTable &symbolTable, u8* symbolBase, size_t nlines, size_t len[], u8* line[], size_t size, u8* dst, size_t lenOut[], u8* strOut[], int unroll) { - size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size; - u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings - SIMDjob input[512]; // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer - SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this) - size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs) - - while (curLine < nlines && outOff <= (1<<19)) { - size_t prevLine = curLine, chunk, curOff = 0; - - // bail out if the output buffer cannot hold the compressed next string fully - if (((len[curLine]-curOff)*2 + 7) > budget) break; // see below for the +7 - else budget -= (len[curLine]-curOff)*2; - - strOut[curLine] = (u8*) 0; - lenOut[curLine] = 0; + size_t curLine = 0, inOff = 0, outOff = 0, batchPos = 0, empty = 0, budget = size; + u8 *lim = dst + size, *codeBase = symbolBase + (1<<18); // 512KB temp space for compressing 512 strings + SIMDjob input[512]; // combined offsets of input strings (cur,end), and string #id (pos) and output (dst) pointer + SIMDjob output[512]; // output are (pos:9,dst:19) end pointers (compute compressed length from this) + size_t jobLine[512]; // for which line in the input sequence was this job (needed because we may split a line into multiple jobs) - do { - do { - chunk = len[curLine] - curOff; - if (chunk > 511) { - chunk = 511; // large strings need to be chopped up into segments of 511 bytes - } - // create a job in this batch - SIMDjob job; - job.cur = inOff; - job.end = job.cur + chunk; - job.pos = batchPos; - job.out = outOff; - - // worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros) - outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes. - if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk - - // register job in this batch - input[batchPos] = job; - jobLine[batchPos] = curLine; - - if (chunk == 0) { - empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out - } else { - // copy string chunk into temp buffer - memcpy(symbolBase + inOff, line[curLine] + curOff, chunk); - inOff += chunk; - curOff += chunk; - symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded - } - if (++batchPos == 512) break; - } while(curOff < len[curLine]); - - if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more? - if (batchPos-empty >= 32) { // if we have enough work, fire off duckdb_fsst_compressAVX512 (32 is due to max 4x8 unrolling) - // radix-sort jobs on length (longest string first) - // -- this provides best load balancing and allows to skip empty jobs at the end - u16 sortpos[513]; - memset(sortpos, 0, sizeof(sortpos)); - - // calculate length histo - for(size_t i=0; i> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { - *out++ = (u8) s.code(); cur += s.length(); - } else { - // could be a 2-byte or 1-byte code, or miss - // handle everything with predication - *out = (u8) code; - out += 1+((code&FSST_CODE_BASE)>>8); - cur += (code>>FSST_LEN_BITS); - } - } - job.out = out - codeBase; - } - // postprocess job info - job.cur = 0; - job.end = job.out - input[job.pos].out; // misuse .end field as compressed size - job.out = input[job.pos].out; // reset offset to start of encoded string - input[job.pos] = job; - } - - // copy out the result data - for(size_t i=0; i budget) break; // see below for the +7 + else budget -= (len[curLine]-curOff)*2; + + strOut[curLine] = (u8*) 0; + lenOut[curLine] = 0; + + do { + do { + chunk = len[curLine] - curOff; + if (chunk > 511) { + chunk = 511; // large strings need to be chopped up into segments of 511 bytes + } + // create a job in this batch + SIMDjob job; + job.cur = inOff; + job.end = job.cur + chunk; + job.pos = batchPos; + job.out = outOff; + + // worst case estimate for compressed size (+7 is for the scatter that writes extra 7 zeros) + outOff += 7 + 2*(size_t)(job.end - job.cur); // note, total size needed is 512*(511*2+7) bytes. + if (outOff > (1<<19)) break; // simdbuf may get full, stop before this chunk + + // register job in this batch + input[batchPos] = job; + jobLine[batchPos] = curLine; + + if (chunk == 0) { + empty++; // detect empty chunks -- SIMD code cannot handle empty strings, so they need to be filtered out + } else { + // copy string chunk into temp buffer + memcpy(symbolBase + inOff, line[curLine] + curOff, chunk); + inOff += chunk; + curOff += chunk; + symbolBase[inOff++] = (u8) symbolTable.terminator; // write an extra char at the end that will not be encoded + } + if (++batchPos == 512) break; + } while(curOff < len[curLine]); + + if ((batchPos == 512) || (outOff > (1<<19)) || (++curLine >= nlines)) { // cannot accumulate more? + if (batchPos-empty >= 32) { // if we have enough work, fire off fsst_compressAVX512 (32 is due to max 4x8 unrolling) + // radix-sort jobs on length (longest string first) + // -- this provides best load balancing and allows to skip empty jobs at the end + u16 sortpos[513]; + memset(sortpos, 0, sizeof(sortpos)); + + // calculate length histo + for(size_t i=0; i> (u8) s.icl); + if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { + *out++ = (u8) s.code(); cur += s.length(); + } else { + // could be a 2-byte or 1-byte code, or miss + // handle everything with predication + *out = (u8) code; + out += 1+((code&FSST_CODE_BASE)>>8); + cur += (code>>FSST_LEN_BITS); + } + } + job.out = out - codeBase; + } + // postprocess job info + job.cur = 0; + job.end = job.out - input[job.pos].out; // misuse .end field as compressed size + job.out = input[job.pos].out; // reset offset to start of encoded string + input[job.pos] = job; + } + + // copy out the result data + for(size_t i=0; i> (u8) s.icl); - if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { - *out++ = (u8) s.code(); cur += s.length(); - } else if (avoidBranch) { - // could be a 2-byte or 1-byte code, or miss - // handle everything with predication - *out = (u8) code; - out += 1+((code&FSST_CODE_BASE)>>8); - cur += (code>>FSST_LEN_BITS); - } else if ((u8) code < byteLim) { - // 2 byte code after checking there is no longer pattern - *out++ = (u8) code; cur += 2; - } else { - // 1 byte code or miss. - *out = (u8) code; - out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse - cur++; - } - } - } - }; + u8 *cur = NULL, *end = NULL, *lim = out + size; + size_t curLine, suffixLim = symbolTable.suffixLim; + u8 byteLim = symbolTable.nSymbols + symbolTable.zeroTerminated - symbolTable.lenHisto[0]; + + u8 buf[512+7]; /* +7 sentinel is to avoid 8-byte unaligned-loads going beyond 511 out-of-bounds */ + memset(buf+511, 0, 8); /* and initialize the sentinal bytes */ + + // three variants are possible. dead code falls away since the bool arguments are constants + auto compressVariant = [&](bool noSuffixOpt, bool avoidBranch) { + while (cur < end) { + u64 word = fsst_unaligned_load(cur); + size_t code = symbolTable.shortCodes[word & 0xFFFF]; + if (noSuffixOpt && ((u8) code) < suffixLim) { + // 2 byte code without having to worry about longer matches + *out++ = (u8) code; cur += 2; + } else { + size_t pos = word & 0xFFFFFF; + size_t idx = FSST_HASH(pos)&(symbolTable.hashTabSize-1); + Symbol s = symbolTable.hashTab[idx]; + out[1] = (u8) word; // speculatively write out escaped byte + word &= (0xFFFFFFFFFFFFFFFF >> (u8) s.icl); + if ((s.icl < FSST_ICL_FREE) && s.val.num == word) { + *out++ = (u8) s.code(); cur += s.length(); + } else if (avoidBranch) { + // could be a 2-byte or 1-byte code, or miss + // handle everything with predication + *out = (u8) code; + out += 1+((code&FSST_CODE_BASE)>>8); + cur += (code>>FSST_LEN_BITS); + } else if ((u8) code < byteLim) { + // 2 byte code after checking there is no longer pattern + *out++ = (u8) code; cur += 2; + } else { + // 1 byte code or miss. + *out = (u8) code; + out += 1+((code&FSST_CODE_BASE)>>8); // predicated - tested with a branch, that was always worse + cur++; + } + } + } + }; - for(curLine=0; curLine 511) { - chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST - skipCopy = false; // need to put terminator, so no in place mem usage possible - } - if ((2*chunk+7) > (size_t) (lim-out)) { - return curLine; // out of memory - } - if (!skipCopy) { // only in case of short zero-terminated strings, we can avoid copying - memcpy(buf, cur, chunk); - cur = buf; - buf[chunk] = (u8) symbolTable.terminator; - } - end = cur + chunk; - // based on symboltable stats, choose a variant that is nice to the branch predictor - if (noSuffixOpt) { - compressVariant(true,false); - } else if (avoidBranch) { - compressVariant(false,true); - } else { - compressVariant(false, false); - } - } while((curOff += chunk) < lenIn[curLine]); - lenOut[curLine] = (size_t) (out - strOut[curLine]); - } - return curLine; + for(curLine=0; curLine 511) { + chunk = 511; // we need to compress in chunks of 511 in order to be byte-compatible with simd-compressed FSST + } + if ((2*chunk+7) > (size_t) (lim-out)) { + return curLine; // out of memory + } + // copy the string to the 511-byte buffer + memcpy(buf, cur, chunk); + buf[chunk] = (u8) symbolTable.terminator; + cur = buf; + end = cur + chunk; + + // based on symboltable stats, choose a variant that is nice to the branch predictor + if (noSuffixOpt) { + compressVariant(true,false); + } else if (avoidBranch) { + compressVariant(false,true); + } else { + compressVariant(false, false); + } + } while((curOff += chunk) < lenIn[curLine]); + lenOut[curLine] = (size_t) (out - strOut[curLine]); + } + return curLine; } #define FSST_SAMPLELINE ((size_t) 512) // quickly select a uniformly random set of lines such that we have between [FSST_SAMPLETARGET,FSST_SAMPLEMAXSZ) string bytes vector makeSample(u8* sampleBuf, u8* strIn[], size_t **lenRef, size_t nlines) { - size_t totSize = 0, *lenIn = *lenRef; - vector sample; + size_t totSize = 0, *lenIn = *lenRef; + vector sample; - for(size_t i=0; i sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample - Encoder *encoder = new Encoder(); - encoder->symbolTable = shared_ptr(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated)); - if (sampleLen != lenIn) delete[] sampleLen; - delete[] sampleBuf; - return (duckdb_fsst_encoder_t*) encoder; + u8* sampleBuf = new u8[FSST_SAMPLEMAXSZ]; + size_t *sampleLen = lenIn; + vector sample = makeSample(sampleBuf, strIn, &sampleLen, n?n:1); // careful handling of input to get a right-size and representative sample + Encoder *encoder = new Encoder(); + encoder->symbolTable = shared_ptr(buildSymbolTable(encoder->counters, sample, sampleLen, zeroTerminated)); + if (sampleLen != lenIn) delete[] sampleLen; + delete[] sampleBuf; + return (duckdb_fsst_encoder_t*) encoder; } /* create another encoder instance, necessary to do multi-threaded encoding using the same symbol table */ extern "C" duckdb_fsst_encoder_t* duckdb_fsst_duplicate(duckdb_fsst_encoder_t *encoder) { - Encoder *e = new Encoder(); - e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr - return (duckdb_fsst_encoder_t*) e; + Encoder *e = new Encoder(); + e->symbolTable = ((Encoder*)encoder)->symbolTable; // it is a shared_ptr + return (duckdb_fsst_encoder_t*) e; } -// export a symbol table in compact format. +// export a symbol table in compact format. extern "C" u32 duckdb_fsst_export(duckdb_fsst_encoder_t *encoder, u8 *buf) { - Encoder *e = (Encoder*) encoder; - // In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there. - // This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t - // (such functionality could be useful to append compressed data to an existing block). - // - // However, the hash function in the encoder hash table is endian-sensitive, and given its - // 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables. - // Doing a endian-conversion during hashing will be slow and self-defeating. - // - // Overall, we could support reconstructing an encoder for incremental compression, but - // should enforce equal-endianness. Bit of a bummer. Not going there now. - // - // The version field is now there just for future-proofness, but not used yet - - // version allows keeping track of fsst versions, track endianness, and encoder reconstruction - u64 version = (FSST_VERSION << 32) | // version is 24 bits, most significant byte is 0 - (((u64) e->symbolTable->suffixLim) << 24) | - (((u64) e->symbolTable->terminator) << 16) | - (((u64) e->symbolTable->nSymbols) << 8) | - FSST_ENDIAN_MARKER; // least significant byte is nonzero + Encoder *e = (Encoder*) encoder; + // In ->version there is a versionnr, but we hide also suffixLim/terminator/nSymbols there. + // This is sufficient in principle to *reconstruct* a duckdb_fsst_encoder_t from a duckdb_fsst_decoder_t + // (such functionality could be useful to append compressed data to an existing block). + // + // However, the hash function in the encoder hash table is endian-sensitive, and given its + // 'lossy perfect' hashing scheme is *unable* to contain other-endian-produced symbol tables. + // Doing a endian-conversion during hashing will be slow and self-defeating. + // + // Overall, we could support reconstructing an encoder for incremental compression, but + // should enforce equal-endianness. Bit of a bummer. Not going there now. + // + // The version field is now there just for future-proofness, but not used yet + + // version allows keeping track of fsst versions, track endianness, and encoder reconstruction + u64 version = (FSST_VERSION << 32) | // version is 24 bits, most significant byte is 0 + (((u64) e->symbolTable->suffixLim) << 24) | + (((u64) e->symbolTable->terminator) << 16) | + (((u64) e->symbolTable->nSymbols) << 8) | + FSST_ENDIAN_MARKER; // least significant byte is nonzero - /* do not assume unaligned reads here */ - memcpy(buf, &version, 8); - buf[8] = e->symbolTable->zeroTerminated; - for(u32 i=0; i<8; i++) - buf[9+i] = (u8) e->symbolTable->lenHisto[i]; - u32 pos = 17; + /* do not assume unaligned reads here */ + memcpy(buf, &version, 8); + buf[8] = e->symbolTable->zeroTerminated; + for(u32 i=0; i<8; i++) + buf[9+i] = (u8) e->symbolTable->lenHisto[i]; + u32 pos = 17; - // emit only the used bytes of the symbols - for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++) - for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++) - buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes + // emit only the used bytes of the symbols + for(u32 i = e->symbolTable->zeroTerminated; i < e->symbolTable->nSymbols; i++) + for(u32 j = 0; j < e->symbolTable->symbols[i].length(); j++) + buf[pos++] = e->symbolTable->symbols[i].val.str[j]; // serialize used symbol bytes - return pos; // length of what was serialized + return pos; // length of what was serialized } #define FSST_CORRUPT 32774747032022883 /* 7-byte number in little endian containing "corrupt" */ extern "C" u32 duckdb_fsst_import(duckdb_fsst_decoder_t *decoder, u8 *buf) { - u64 version = 0; - u32 code, pos = 17; - u8 lenHisto[8]; - - // version field (first 8 bytes) is now there just for future-proofness, unused still (skipped) - memcpy(&version, buf, 8); - if ((version>>32) != FSST_VERSION) return 0; - decoder->zeroTerminated = buf[8]&1; - memcpy(lenHisto, buf+9, 8); - - // in case of zero-terminated, first symbol is "" (zero always, may be overwritten) - decoder->len[0] = 1; - decoder->symbol[0] = 0; - - // we use lenHisto[0] as 1-byte symbol run length (at the end) - code = decoder->zeroTerminated; - if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end - - // now get all symbols from the buffer - for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */ - for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) { - decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */ - decoder->symbol[code] = 0; - for(u32 j=0; jlen[code]; j++) - ((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols - } - } - if (decoder->zeroTerminated) lenHisto[0]++; + u64 version = 0; + u32 code, pos = 17; + u8 lenHisto[8]; - // fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols). - while(code<255) { - decoder->symbol[code] = FSST_CORRUPT; - decoder->len[code++] = 8; - } - return pos; + // version field (first 8 bytes) is now there just for future-proofness, unused still (skipped) + memcpy(&version, buf, 8); + if ((version>>32) != FSST_VERSION) return 0; + decoder->zeroTerminated = buf[8]&1; + memcpy(lenHisto, buf+9, 8); + + // in case of zero-terminated, first symbol is "" (zero always, may be overwritten) + decoder->len[0] = 1; + decoder->symbol[0] = 0; + + // we use lenHisto[0] as 1-byte symbol run length (at the end) + code = decoder->zeroTerminated; + if (decoder->zeroTerminated) lenHisto[0]--; // if zeroTerminated, then symbol "" aka 1-byte code=0, is not stored at the end + + // now get all symbols from the buffer + for(u32 l=1; l<=8; l++) { /* l = 1,2,3,4,5,6,7,8 */ + for(u32 i=0; i < lenHisto[(l&7) /* 1,2,3,4,5,6,7,0 */]; i++, code++) { + decoder->len[code] = (l&7)+1; /* len = 2,3,4,5,6,7,8,1 */ + decoder->symbol[code] = 0; + for(u32 j=0; jlen[code]; j++) + ((u8*) &decoder->symbol[code])[j] = buf[pos++]; // note this enforces 'little endian' symbols + } + } + if (decoder->zeroTerminated) lenHisto[0]++; + + // fill unused symbols with text "corrupt". Gives a chance to detect corrupted code sequences (if there are unused symbols). + while(code<255) { + decoder->symbol[code] = FSST_CORRUPT; + decoder->len[code++] = 8; + } + return pos; } // runtime check for simd inline size_t _compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) { #ifndef NONOPT_FSST - if (simd && duckdb_fsst_hasAVX512()) - return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); + if (simd && duckdb_fsst_hasAVX512()) + return compressSIMD(*e->symbolTable, e->simdbuf, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); #endif - (void) simd; - return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch); + (void) simd; + return compressBulk(*e->symbolTable, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch); } size_t compressImpl(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd) { - return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd); + return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd); } -// adaptive choosing of scalar compression method based on symbol length histogram +// adaptive choosing of scalar compression method based on symbol length histogram inline size_t _compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) { - bool avoidBranch = false, noSuffixOpt = false; - if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) { - noSuffixOpt = true; - } else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) && - (e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) && - (e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) { - avoidBranch = true; - } - return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd); + bool avoidBranch = false, noSuffixOpt = false; + if (100*e->symbolTable->lenHisto[1] > 65*e->symbolTable->nSymbols && 100*e->symbolTable->suffixLim > 95*e->symbolTable->lenHisto[1]) { + noSuffixOpt = true; + } else if ((e->symbolTable->lenHisto[0] > 24 && e->symbolTable->lenHisto[0] < 92) && + (e->symbolTable->lenHisto[0] < 43 || e->symbolTable->lenHisto[6] + e->symbolTable->lenHisto[7] < 29) && + (e->symbolTable->lenHisto[0] < 72 || e->symbolTable->lenHisto[2] < 72)) { + avoidBranch = true; + } + return _compressImpl(e, nlines, lenIn, strIn, size, output, lenOut, strOut, noSuffixOpt, avoidBranch, simd); } size_t compressAuto(Encoder *e, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[], int simd) { - return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); + return _compressAuto(e, nlines, lenIn, strIn, size, output, lenOut, strOut, simd); } // the main compression function (everything automatic) extern "C" size_t duckdb_fsst_compress(duckdb_fsst_encoder_t *encoder, size_t nlines, size_t lenIn[], u8 *strIn[], size_t size, u8 *output, size_t *lenOut, u8 *strOut[]) { - // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB) - size_t totLen = accumulate(lenIn, lenIn+nlines, 0); - int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15); - return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd); + // to be faster than scalar, simd needs 64 lines or more of length >=12; or fewer lines, but big ones (totLen > 32KB) + size_t totLen = accumulate(lenIn, lenIn+nlines, 0); + int simd = totLen > nlines*12 && (nlines > 64 || totLen > (size_t) 1<<15); + return _compressAuto((Encoder*) encoder, nlines, lenIn, strIn, size, output, lenOut, strOut, 3*simd); } /* deallocate encoder */ extern "C" void duckdb_fsst_destroy(duckdb_fsst_encoder_t* encoder) { - Encoder *e = (Encoder*) encoder; - delete e; + Encoder *e = (Encoder*) encoder; + delete e; } /* very lazy implementation relying on export and import */ extern "C" duckdb_fsst_decoder_t duckdb_fsst_decoder(duckdb_fsst_encoder_t *encoder) { - u8 buf[sizeof(duckdb_fsst_decoder_t)]; - u32 cnt1 = duckdb_fsst_export(encoder, buf); - duckdb_fsst_decoder_t decoder; - u32 cnt2 = duckdb_fsst_import(&decoder, buf); - assert(cnt1 == cnt2); (void) cnt1; (void) cnt2; - return decoder; + u8 buf[sizeof(duckdb_fsst_decoder_t)]; + u32 cnt1 = duckdb_fsst_export(encoder, buf); + duckdb_fsst_decoder_t decoder; + u32 cnt2 = duckdb_fsst_import(&decoder, buf); + assert(cnt1 == cnt2); (void) cnt1; (void) cnt2; + return decoder; } - // LICENSE_CHANGE_END diff --git a/libduckdb-sys/duckdb/duckdb.h b/libduckdb-sys/duckdb/duckdb.h index 598c6076..7a564706 100644 --- a/libduckdb-sys/duckdb/duckdb.h +++ b/libduckdb-sys/duckdb/duckdb.h @@ -520,6 +520,8 @@ DUCKDB_API const char *duckdb_result_error(duckdb_result *result); /*! Fetches a data chunk from the duckdb_result. This function should be called repeatedly until the result is exhausted. +The result must be destroyed with `duckdb_destroy_data_chunk`. + This function supersedes all `duckdb_value` functions, as well as the `duckdb_column_data` and `duckdb_nullmask_data` functions. It results in significantly better performance, and should be preferred in newer code-bases. diff --git a/libduckdb-sys/duckdb/duckdb.hpp b/libduckdb-sys/duckdb/duckdb.hpp index 83d95182..2cf38ab2 100644 --- a/libduckdb-sys/duckdb/duckdb.hpp +++ b/libduckdb-sys/duckdb/duckdb.hpp @@ -10,8 +10,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI #pragma once #define DUCKDB_AMALGAMATION 1 -#define DUCKDB_SOURCE_ID "2213f9c946" -#define DUCKDB_VERSION "v0.6.0" +#define DUCKDB_SOURCE_ID "919cad22e8" +#define DUCKDB_VERSION "v0.6.1" //===----------------------------------------------------------------------===// // DuckDB // @@ -676,124 +676,6 @@ class Value; class TypeCatalogEntry; class Vector; class ClientContext; -//! Type used to represent dates (days since 1970-01-01) -struct date_t { // NOLINT - int32_t days; - - date_t() = default; - explicit inline date_t(int32_t days_p) : days(days_p) {} - - // explicit conversion - explicit inline operator int32_t() const {return days;} - - // comparison operators - inline bool operator==(const date_t &rhs) const {return days == rhs.days;}; - inline bool operator!=(const date_t &rhs) const {return days != rhs.days;}; - inline bool operator<=(const date_t &rhs) const {return days <= rhs.days;}; - inline bool operator<(const date_t &rhs) const {return days < rhs.days;}; - inline bool operator>(const date_t &rhs) const {return days > rhs.days;}; - inline bool operator>=(const date_t &rhs) const {return days >= rhs.days;}; - - // arithmetic operators - inline date_t operator+(const int32_t &days) const {return date_t(this->days + days);}; - inline date_t operator-(const int32_t &days) const {return date_t(this->days - days);}; - - // in-place operators - inline date_t &operator+=(const int32_t &days) {this->days += days; return *this;}; - inline date_t &operator-=(const int32_t &days) {this->days -= days; return *this;}; - - // special values - static inline date_t infinity() {return date_t(std::numeric_limits::max()); } // NOLINT - static inline date_t ninfinity() {return date_t(-std::numeric_limits::max()); } // NOLINT - static inline date_t epoch() {return date_t(0); } // NOLINT -}; - -//! Type used to represent time (microseconds) -struct dtime_t { // NOLINT - int64_t micros; - - dtime_t() = default; - explicit inline dtime_t(int64_t micros_p) : micros(micros_p) {} - inline dtime_t& operator=(int64_t micros_p) {micros = micros_p; return *this;} - - // explicit conversion - explicit inline operator int64_t() const {return micros;} - explicit inline operator double() const {return micros;} - - // comparison operators - inline bool operator==(const dtime_t &rhs) const {return micros == rhs.micros;}; - inline bool operator!=(const dtime_t &rhs) const {return micros != rhs.micros;}; - inline bool operator<=(const dtime_t &rhs) const {return micros <= rhs.micros;}; - inline bool operator<(const dtime_t &rhs) const {return micros < rhs.micros;}; - inline bool operator>(const dtime_t &rhs) const {return micros > rhs.micros;}; - inline bool operator>=(const dtime_t &rhs) const {return micros >= rhs.micros;}; - - // arithmetic operators - inline dtime_t operator+(const int64_t µs) const {return dtime_t(this->micros + micros);}; - inline dtime_t operator+(const double µs) const {return dtime_t(this->micros + int64_t(micros));}; - inline dtime_t operator-(const int64_t µs) const {return dtime_t(this->micros - micros);}; - inline dtime_t operator*(const idx_t &copies) const {return dtime_t(this->micros * copies);}; - inline dtime_t operator/(const idx_t &copies) const {return dtime_t(this->micros / copies);}; - inline int64_t operator-(const dtime_t &other) const {return this->micros - other.micros;}; - - // in-place operators - inline dtime_t &operator+=(const int64_t µs) {this->micros += micros; return *this;}; - inline dtime_t &operator-=(const int64_t µs) {this->micros -= micros; return *this;}; - inline dtime_t &operator+=(const dtime_t &other) {this->micros += other.micros; return *this;}; - - // special values - static inline dtime_t allballs() {return dtime_t(0); } // NOLINT -}; - -struct dtime_tz_t : public dtime_t {}; - -//! Type used to represent timestamps (seconds,microseconds,milliseconds or nanoseconds since 1970-01-01) -struct timestamp_t { // NOLINT - int64_t value; - - timestamp_t() = default; - explicit inline timestamp_t(int64_t value_p) : value(value_p) {} - inline timestamp_t& operator=(int64_t value_p) {value = value_p; return *this;} - - // explicit conversion - explicit inline operator int64_t() const {return value;} - - // comparison operators - inline bool operator==(const timestamp_t &rhs) const {return value == rhs.value;}; - inline bool operator!=(const timestamp_t &rhs) const {return value != rhs.value;}; - inline bool operator<=(const timestamp_t &rhs) const {return value <= rhs.value;}; - inline bool operator<(const timestamp_t &rhs) const {return value < rhs.value;}; - inline bool operator>(const timestamp_t &rhs) const {return value > rhs.value;}; - inline bool operator>=(const timestamp_t &rhs) const {return value >= rhs.value;}; - - // arithmetic operators - inline timestamp_t operator+(const double &value) const {return timestamp_t(this->value + int64_t(value));}; - inline int64_t operator-(const timestamp_t &other) const {return this->value - other.value;}; - - // in-place operators - inline timestamp_t &operator+=(const int64_t &value) {this->value += value; return *this;}; - inline timestamp_t &operator-=(const int64_t &value) {this->value -= value; return *this;}; - - // special values - static inline timestamp_t infinity() {return timestamp_t(std::numeric_limits::max()); } // NOLINT - static inline timestamp_t ninfinity() {return timestamp_t(-std::numeric_limits::max()); } // NOLINT - static inline timestamp_t epoch() {return timestamp_t(0); } // NOLINT -}; - -struct timestamp_tz_t : public timestamp_t {}; -struct timestamp_ns_t : public timestamp_t {}; -struct timestamp_ms_t : public timestamp_t {}; -struct timestamp_sec_t : public timestamp_t {}; - -struct interval_t { - int32_t months; - int32_t days; - int64_t micros; - - inline bool operator==(const interval_t &rhs) const { - return this->days == rhs.days && this->months == rhs.months && this->micros == rhs.micros; - } -}; struct hugeint_t { public: @@ -1248,65 +1130,12 @@ struct AggregateStateType { DUCKDB_API static const aggregate_state_t &GetStateType(const LogicalType &type); }; - DUCKDB_API string LogicalTypeIdToString(LogicalTypeId type); DUCKDB_API LogicalTypeId TransformStringToLogicalTypeId(const string &str); DUCKDB_API LogicalType TransformStringToLogicalType(const string &str); -//! Returns the PhysicalType for the given type -template -PhysicalType GetTypeId() { - if (std::is_same()) { - return PhysicalType::BOOL; - } else if (std::is_same()) { - return PhysicalType::INT8; - } else if (std::is_same()) { - return PhysicalType::INT16; - } else if (std::is_same()) { - return PhysicalType::INT32; - } else if (std::is_same()) { - return PhysicalType::INT64; - } else if (std::is_same()) { - return PhysicalType::UINT8; - } else if (std::is_same()) { - return PhysicalType::UINT16; - } else if (std::is_same()) { - return PhysicalType::UINT32; - } else if (std::is_same()) { - return PhysicalType::UINT64; - } else if (std::is_same()) { - return PhysicalType::INT128; - } else if (std::is_same()) { - return PhysicalType::INT32; - } else if (std::is_same()) { - return PhysicalType::INT64; - } else if (std::is_same()) { - return PhysicalType::INT64; - } else if (std::is_same()) { - return PhysicalType::FLOAT; - } else if (std::is_same()) { - return PhysicalType::DOUBLE; - } else if (std::is_same() || std::is_same() || std::is_same()) { - return PhysicalType::VARCHAR; - } else if (std::is_same()) { - return PhysicalType::INTERVAL; - } else { - return PhysicalType::INVALID; - } -} - -template -bool TypeIsNumber() { - return std::is_integral() || std::is_floating_point() || std::is_same(); -} - -template -bool IsValidType() { - return GetTypeId() != PhysicalType::INVALID; -} - //! The PhysicalType used by the row identifiers column extern const PhysicalType ROW_TYPE; @@ -1317,11 +1146,6 @@ bool TypeIsIntegral(PhysicalType type); bool TypeIsNumeric(PhysicalType type); bool TypeIsInteger(PhysicalType type); -template -bool IsIntegerType() { - return TypeIsIntegral(GetTypeId()); -} - bool ApproxEqual(float l, float r); bool ApproxEqual(double l, double r); @@ -1336,87 +1160,6 @@ struct aggregate_state_t { } // namespace duckdb -namespace std { - - //! Date - template <> - struct hash - { - std::size_t operator()(const duckdb::date_t& k) const - { - using std::hash; - return hash()((int32_t)k); - } - }; - - //! Time - template <> - struct hash - { - std::size_t operator()(const duckdb::dtime_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - template <> - struct hash - { - std::size_t operator()(const duckdb::dtime_tz_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - - //! Timestamp - template <> - struct hash - { - std::size_t operator()(const duckdb::timestamp_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - template <> - struct hash - { - std::size_t operator()(const duckdb::timestamp_ms_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - template <> - struct hash - { - std::size_t operator()(const duckdb::timestamp_ns_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - template <> - struct hash - { - std::size_t operator()(const duckdb::timestamp_sec_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; - template <> - struct hash - { - std::size_t operator()(const duckdb::timestamp_tz_t& k) const - { - using std::hash; - return hash()((int64_t)k); - } - }; -} - namespace duckdb { @@ -2948,82 +2691,1003 @@ struct ValidityMask : public TemplatedValidityMask { +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/timestamp.hpp +// +// +//===----------------------------------------------------------------------===// -namespace duckdb { -class CastFunctionSet; -class Deserializer; -class Serializer; -struct GetCastFunctionInput; -//! The Value object holds a single arbitrary value of any type that can be -//! stored in the database. -class Value { - friend struct StringValue; - friend struct StructValue; - friend struct ListValue; - friend struct UnionValue; -public: - //! Create an empty NULL value of the specified type - DUCKDB_API explicit Value(LogicalType type = LogicalType::SQLNULL); - //! Create an INTEGER value - DUCKDB_API Value(int32_t val); // NOLINT: Allow implicit conversion from `int32_t` - //! Create a BIGINT value - DUCKDB_API Value(int64_t val); // NOLINT: Allow implicit conversion from `int64_t` - //! Create a FLOAT value - DUCKDB_API Value(float val); // NOLINT: Allow implicit conversion from `float` - //! Create a DOUBLE value - DUCKDB_API Value(double val); // NOLINT: Allow implicit conversion from `double` - //! Create a VARCHAR value - DUCKDB_API Value(const char *val); // NOLINT: Allow implicit conversion from `const char *` - //! Create a NULL value - DUCKDB_API Value(std::nullptr_t val); // NOLINT: Allow implicit conversion from `nullptr_t` - //! Create a VARCHAR value - DUCKDB_API Value(string_t val); // NOLINT: Allow implicit conversion from `string_t` - //! Create a VARCHAR value - DUCKDB_API Value(string val); // NOLINT: Allow implicit conversion from `string` - //! Copy constructor - DUCKDB_API Value(const Value &other); - //! Move constructor - DUCKDB_API Value(Value &&other) noexcept; - //! Destructor - DUCKDB_API ~Value(); +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/limits.hpp +// +// +//===----------------------------------------------------------------------===// - // copy assignment - DUCKDB_API Value &operator=(const Value &other); - // move assignment - DUCKDB_API Value &operator=(Value &&other) noexcept; - inline LogicalType &type() { - return type_; + + + +namespace duckdb { + +template +struct NumericLimits { + DUCKDB_API static T Minimum(); + DUCKDB_API static T Maximum(); + DUCKDB_API static bool IsSigned(); + DUCKDB_API static idx_t Digits(); +}; + +template <> +struct NumericLimits { + DUCKDB_API static int8_t Minimum(); + DUCKDB_API static int8_t Maximum(); + DUCKDB_API static bool IsSigned() { + return true; } - inline const LogicalType &type() const { - return type_; + DUCKDB_API static idx_t Digits() { + return 3; } - inline bool IsNull() const { - return is_null; +}; +template <> +struct NumericLimits { + DUCKDB_API static int16_t Minimum(); + DUCKDB_API static int16_t Maximum(); + DUCKDB_API static bool IsSigned() { + return true; } - - //! Create the lowest possible value of a given type (numeric only) - DUCKDB_API static Value MinimumValue(const LogicalType &type); - //! Create the highest possible value of a given type (numeric only) - DUCKDB_API static Value MaximumValue(const LogicalType &type); - //! Create a Numeric value of the specified type with the specified value - DUCKDB_API static Value Numeric(const LogicalType &type, int64_t value); - DUCKDB_API static Value Numeric(const LogicalType &type, hugeint_t value); - - //! Create a tinyint Value from a specified value - DUCKDB_API static Value BOOLEAN(int8_t value); - //! Create a tinyint Value from a specified value - DUCKDB_API static Value TINYINT(int8_t value); - //! Create a smallint Value from a specified value - DUCKDB_API static Value SMALLINT(int16_t value); - //! Create an integer Value from a specified value - DUCKDB_API static Value INTEGER(int32_t value); - //! Create a bigint Value from a specified value - DUCKDB_API static Value BIGINT(int64_t value); - //! Create an unsigned tinyint Value from a specified value + DUCKDB_API static idx_t Digits() { + return 5; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static int32_t Minimum(); + DUCKDB_API static int32_t Maximum(); + DUCKDB_API static bool IsSigned() { + return true; + } + DUCKDB_API static idx_t Digits() { + return 10; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static int64_t Minimum(); + DUCKDB_API static int64_t Maximum(); + DUCKDB_API static bool IsSigned() { + return true; + } + DUCKDB_API static idx_t Digits() { + return 19; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static hugeint_t Minimum(); + DUCKDB_API static hugeint_t Maximum(); + DUCKDB_API static bool IsSigned() { + return true; + } + DUCKDB_API static idx_t Digits() { + return 39; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static uint8_t Minimum(); + DUCKDB_API static uint8_t Maximum(); + DUCKDB_API static bool IsSigned() { + return false; + } + DUCKDB_API static idx_t Digits() { + return 3; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static uint16_t Minimum(); + DUCKDB_API static uint16_t Maximum(); + DUCKDB_API static bool IsSigned() { + return false; + } + DUCKDB_API static idx_t Digits() { + return 5; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static uint32_t Minimum(); + DUCKDB_API static uint32_t Maximum(); + DUCKDB_API static bool IsSigned() { + return false; + } + DUCKDB_API static idx_t Digits() { + return 10; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static uint64_t Minimum(); + DUCKDB_API static uint64_t Maximum(); + DUCKDB_API static bool IsSigned() { + return false; + } + DUCKDB_API static idx_t Digits() { + return 20; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static float Minimum(); + DUCKDB_API static float Maximum(); + DUCKDB_API static bool IsSigned() { + return true; + } + DUCKDB_API static idx_t Digits() { + return 127; + } +}; +template <> +struct NumericLimits { + DUCKDB_API static double Minimum(); + DUCKDB_API static double Maximum(); + DUCKDB_API static bool IsSigned() { + return true; + } + DUCKDB_API static idx_t Digits() { + return 250; + } +}; + +} // namespace duckdb + + + + +#include + +namespace duckdb { + +struct date_t; +struct dtime_t; + +//! Type used to represent timestamps (seconds,microseconds,milliseconds or nanoseconds since 1970-01-01) +struct timestamp_t { // NOLINT + int64_t value; + + timestamp_t() = default; + explicit inline timestamp_t(int64_t value_p) : value(value_p) { + } + inline timestamp_t &operator=(int64_t value_p) { + value = value_p; + return *this; + } + + // explicit conversion + explicit inline operator int64_t() const { + return value; + } + + // comparison operators + inline bool operator==(const timestamp_t &rhs) const { + return value == rhs.value; + }; + inline bool operator!=(const timestamp_t &rhs) const { + return value != rhs.value; + }; + inline bool operator<=(const timestamp_t &rhs) const { + return value <= rhs.value; + }; + inline bool operator<(const timestamp_t &rhs) const { + return value < rhs.value; + }; + inline bool operator>(const timestamp_t &rhs) const { + return value > rhs.value; + }; + inline bool operator>=(const timestamp_t &rhs) const { + return value >= rhs.value; + }; + + // arithmetic operators + inline timestamp_t operator+(const double &value) const { + return timestamp_t(this->value + int64_t(value)); + }; + inline int64_t operator-(const timestamp_t &other) const { + return this->value - other.value; + }; + + // in-place operators + inline timestamp_t &operator+=(const int64_t &value) { + this->value += value; + return *this; + }; + inline timestamp_t &operator-=(const int64_t &value) { + this->value -= value; + return *this; + }; + + // special values + static timestamp_t infinity() { + return timestamp_t(NumericLimits::Maximum()); + } // NOLINT + static timestamp_t ninfinity() { + return timestamp_t(-NumericLimits::Maximum()); + } // NOLINT + static inline timestamp_t epoch() { + return timestamp_t(0); + } // NOLINT +}; + +struct timestamp_tz_t : public timestamp_t {}; +struct timestamp_ns_t : public timestamp_t {}; +struct timestamp_ms_t : public timestamp_t {}; +struct timestamp_sec_t : public timestamp_t {}; + +//! The Timestamp class is a static class that holds helper functions for the Timestamp +//! type. +class Timestamp { +public: + // min timestamp is 290308-12-22 (BC) + constexpr static const int32_t MIN_YEAR = -290308; + constexpr static const int32_t MIN_MONTH = 12; + constexpr static const int32_t MIN_DAY = 22; + +public: + //! Convert a string in the format "YYYY-MM-DD hh:mm:ss[.f][-+TH[:tm]]" to a timestamp object + DUCKDB_API static timestamp_t FromString(const string &str); + //! Convert a string where the offset can also be a time zone string: / [A_Za-z0-9/_]+/ + //! If has_offset is true, then the result is an instant that was offset from UTC + //! If the tz is not empty, the result is still an instant, but the parts can be extracted and applied to the TZ + DUCKDB_API static bool TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset, + string_t &tz); + DUCKDB_API static bool TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result); + DUCKDB_API static timestamp_t FromCString(const char *str, idx_t len); + //! Convert a date object to a string in the format "YYYY-MM-DD hh:mm:ss" + DUCKDB_API static string ToString(timestamp_t timestamp); + + DUCKDB_API static date_t GetDate(timestamp_t timestamp); + + DUCKDB_API static dtime_t GetTime(timestamp_t timestamp); + //! Create a Timestamp object from a specified (date, time) combination + DUCKDB_API static timestamp_t FromDatetime(date_t date, dtime_t time); + DUCKDB_API static bool TryFromDatetime(date_t date, dtime_t time, timestamp_t &result); + + //! Is the timestamp finite or infinite? + static inline bool IsFinite(timestamp_t timestamp) { + return timestamp != timestamp_t::infinity() && timestamp != timestamp_t::ninfinity(); + } + + //! Extract the date and time from a given timestamp object + DUCKDB_API static void Convert(timestamp_t date, date_t &out_date, dtime_t &out_time); + //! Returns current timestamp + DUCKDB_API static timestamp_t GetCurrentTimestamp(); + + //! Convert the epoch (in sec) to a timestamp + DUCKDB_API static timestamp_t FromEpochSeconds(int64_t ms); + //! Convert the epoch (in ms) to a timestamp + DUCKDB_API static timestamp_t FromEpochMs(int64_t ms); + //! Convert the epoch (in microseconds) to a timestamp + DUCKDB_API static timestamp_t FromEpochMicroSeconds(int64_t micros); + //! Convert the epoch (in nanoseconds) to a timestamp + DUCKDB_API static timestamp_t FromEpochNanoSeconds(int64_t micros); + + //! Convert the epoch (in seconds) to a timestamp + DUCKDB_API static int64_t GetEpochSeconds(timestamp_t timestamp); + //! Convert the epoch (in ms) to a timestamp + DUCKDB_API static int64_t GetEpochMs(timestamp_t timestamp); + //! Convert a timestamp to epoch (in microseconds) + DUCKDB_API static int64_t GetEpochMicroSeconds(timestamp_t timestamp); + //! Convert a timestamp to epoch (in nanoseconds) + DUCKDB_API static int64_t GetEpochNanoSeconds(timestamp_t timestamp); + + DUCKDB_API static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, + int &minute_offset); + + DUCKDB_API static string ConversionError(const string &str); + DUCKDB_API static string ConversionError(string_t str); +}; + +} // namespace duckdb + +namespace std { + +//! Timestamp +template <> +struct hash { + std::size_t operator()(const duckdb::timestamp_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +template <> +struct hash { + std::size_t operator()(const duckdb::timestamp_ms_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +template <> +struct hash { + std::size_t operator()(const duckdb::timestamp_ns_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +template <> +struct hash { + std::size_t operator()(const duckdb::timestamp_sec_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +template <> +struct hash { + std::size_t operator()(const duckdb::timestamp_tz_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +} // namespace std + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/date.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/string_type.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + +#include + +namespace duckdb { + +struct string_t { + friend struct StringComparisonOperators; + friend class StringSegment; + +public: + static constexpr idx_t PREFIX_BYTES = 4 * sizeof(char); + static constexpr idx_t INLINE_BYTES = 12 * sizeof(char); + static constexpr idx_t HEADER_SIZE = sizeof(uint32_t) + PREFIX_BYTES; +#ifndef DUCKDB_DEBUG_NO_INLINE + static constexpr idx_t PREFIX_LENGTH = PREFIX_BYTES; + static constexpr idx_t INLINE_LENGTH = INLINE_BYTES; +#else + static constexpr idx_t PREFIX_LENGTH = 0; + static constexpr idx_t INLINE_LENGTH = 0; +#endif + + string_t() = default; + explicit string_t(uint32_t len) { + value.inlined.length = len; + } + string_t(const char *data, uint32_t len) { + value.inlined.length = len; + D_ASSERT(data || GetSize() == 0); + if (IsInlined()) { + // zero initialize the prefix first + // this makes sure that strings with length smaller than 4 still have an equal prefix + memset(value.inlined.inlined, 0, INLINE_BYTES); + if (GetSize() == 0) { + return; + } + // small string: inlined + memcpy(value.inlined.inlined, data, GetSize()); + } else { + // large string: store pointer +#ifndef DUCKDB_DEBUG_NO_INLINE + memcpy(value.pointer.prefix, data, PREFIX_LENGTH); +#else + memset(value.pointer.prefix, 0, PREFIX_BYTES); +#endif + value.pointer.ptr = (char *)data; + } + } + string_t(const char *data) : string_t(data, strlen(data)) { // NOLINT: Allow implicit conversion from `const char*` + } + string_t(const string &value) + : string_t(value.c_str(), value.size()) { // NOLINT: Allow implicit conversion from `const char*` + } + + bool IsInlined() const { + return GetSize() <= INLINE_LENGTH; + } + + //! this is unsafe since the string will not be terminated at the end + const char *GetDataUnsafe() const { + return IsInlined() ? (const char *)value.inlined.inlined : value.pointer.ptr; + } + + char *GetDataWriteable() const { + return IsInlined() ? (char *)value.inlined.inlined : value.pointer.ptr; + } + + const char *GetPrefix() const { + return value.pointer.prefix; + } + + idx_t GetSize() const { + return value.inlined.length; + } + + string GetString() const { + return string(GetDataUnsafe(), GetSize()); + } + + explicit operator string() const { + return GetString(); + } + + void Finalize() { + // set trailing NULL byte + if (GetSize() <= INLINE_LENGTH) { + // fill prefix with zeros if the length is smaller than the prefix length + for (idx_t i = GetSize(); i < INLINE_BYTES; i++) { + value.inlined.inlined[i] = '\0'; + } + } else { + // copy the data into the prefix +#ifndef DUCKDB_DEBUG_NO_INLINE + auto dataptr = (char *)GetDataUnsafe(); + memcpy(value.pointer.prefix, dataptr, PREFIX_LENGTH); +#else + memset(value.pointer.prefix, 0, PREFIX_BYTES); +#endif + } + } + + void Verify() const; + void VerifyNull() const; + bool operator<(const string_t &r) const { + auto this_str = this->GetString(); + auto r_str = r.GetString(); + return this_str < r_str; + } + +private: + union { + struct { + uint32_t length; + char prefix[4]; + char *ptr; + } pointer; + struct { + uint32_t length; + char inlined[12]; + } inlined; + } value; +}; + +} // namespace duckdb + + + +#include + +namespace duckdb { + +struct timestamp_t; + +//! Type used to represent dates (days since 1970-01-01) +struct date_t { // NOLINT + int32_t days; + + date_t() = default; + explicit inline date_t(int32_t days_p) : days(days_p) { + } + + // explicit conversion + explicit inline operator int32_t() const { + return days; + } + + // comparison operators + inline bool operator==(const date_t &rhs) const { + return days == rhs.days; + }; + inline bool operator!=(const date_t &rhs) const { + return days != rhs.days; + }; + inline bool operator<=(const date_t &rhs) const { + return days <= rhs.days; + }; + inline bool operator<(const date_t &rhs) const { + return days < rhs.days; + }; + inline bool operator>(const date_t &rhs) const { + return days > rhs.days; + }; + inline bool operator>=(const date_t &rhs) const { + return days >= rhs.days; + }; + + // arithmetic operators + inline date_t operator+(const int32_t &days) const { + return date_t(this->days + days); + }; + inline date_t operator-(const int32_t &days) const { + return date_t(this->days - days); + }; + + // in-place operators + inline date_t &operator+=(const int32_t &days) { + this->days += days; + return *this; + }; + inline date_t &operator-=(const int32_t &days) { + this->days -= days; + return *this; + }; + + // special values + static inline date_t infinity() { + return date_t(NumericLimits::Maximum()); + } // NOLINT + static inline date_t ninfinity() { + return date_t(-NumericLimits::Maximum()); + } // NOLINT + static inline date_t epoch() { + return date_t(0); + } // NOLINT +}; + +//! The Date class is a static class that holds helper functions for the Date type. +class Date { +public: + static const char *PINF; // NOLINT + static const char *NINF; // NOLINT + static const char *EPOCH; // NOLINT + + static const string_t MONTH_NAMES[12]; + static const string_t MONTH_NAMES_ABBREVIATED[12]; + static const string_t DAY_NAMES[7]; + static const string_t DAY_NAMES_ABBREVIATED[7]; + static const int32_t NORMAL_DAYS[13]; + static const int32_t CUMULATIVE_DAYS[13]; + static const int32_t LEAP_DAYS[13]; + static const int32_t CUMULATIVE_LEAP_DAYS[13]; + static const int32_t CUMULATIVE_YEAR_DAYS[401]; + static const int8_t MONTH_PER_DAY_OF_YEAR[365]; + static const int8_t LEAP_MONTH_PER_DAY_OF_YEAR[366]; + + // min date is 5877642-06-25 (BC) (-2^31+2) + constexpr static const int32_t DATE_MIN_YEAR = -5877641; + constexpr static const int32_t DATE_MIN_MONTH = 6; + constexpr static const int32_t DATE_MIN_DAY = 25; + // max date is 5881580-07-10 (2^31-2) + constexpr static const int32_t DATE_MAX_YEAR = 5881580; + constexpr static const int32_t DATE_MAX_MONTH = 7; + constexpr static const int32_t DATE_MAX_DAY = 10; + constexpr static const int32_t EPOCH_YEAR = 1970; + + constexpr static const int32_t YEAR_INTERVAL = 400; + constexpr static const int32_t DAYS_PER_YEAR_INTERVAL = 146097; + +public: + //! Convert a string in the format "YYYY-MM-DD" to a date object + DUCKDB_API static date_t FromString(const string &str, bool strict = false); + //! Convert a string in the format "YYYY-MM-DD" to a date object + DUCKDB_API static date_t FromCString(const char *str, idx_t len, bool strict = false); + //! Convert a date object to a string in the format "YYYY-MM-DD" + DUCKDB_API static string ToString(date_t date); + //! Try to convert text in a buffer to a date; returns true if parsing was successful + //! If the date was a "special" value, the special flag will be set. + DUCKDB_API static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special, + bool strict = false); + + //! Create a string "YYYY-MM-DD" from a specified (year, month, day) + //! combination + DUCKDB_API static string Format(int32_t year, int32_t month, int32_t day); + + //! Extract the year, month and day from a given date object + DUCKDB_API static void Convert(date_t date, int32_t &out_year, int32_t &out_month, int32_t &out_day); + //! Create a Date object from a specified (year, month, day) combination + DUCKDB_API static date_t FromDate(int32_t year, int32_t month, int32_t day); + DUCKDB_API static bool TryFromDate(int32_t year, int32_t month, int32_t day, date_t &result); + + //! Returns true if (year) is a leap year, and false otherwise + DUCKDB_API static bool IsLeapYear(int32_t year); + + //! Returns true if the specified (year, month, day) combination is a valid + //! date + DUCKDB_API static bool IsValid(int32_t year, int32_t month, int32_t day); + + //! Returns true if the specified date is finite + static inline bool IsFinite(date_t date) { + return date != date_t::infinity() && date != date_t::ninfinity(); + } + + //! The max number of days in a month of a given year + DUCKDB_API static int32_t MonthDays(int32_t year, int32_t month); + + //! Extract the epoch from the date (seconds since 1970-01-01) + DUCKDB_API static int64_t Epoch(date_t date); + //! Extract the epoch from the date (nanoseconds since 1970-01-01) + DUCKDB_API static int64_t EpochNanoseconds(date_t date); + //! Extract the epoch from the date (microseconds since 1970-01-01) + DUCKDB_API static int64_t EpochMicroseconds(date_t date); + //! Convert the epoch (seconds since 1970-01-01) to a date_t + DUCKDB_API static date_t EpochToDate(int64_t epoch); + + //! Extract the number of days since epoch (days since 1970-01-01) + DUCKDB_API static int32_t EpochDays(date_t date); + //! Convert the epoch number of days to a date_t + DUCKDB_API static date_t EpochDaysToDate(int32_t epoch); + + //! Extract year of a date entry + DUCKDB_API static int32_t ExtractYear(date_t date); + //! Extract year of a date entry, but optimized to first try the last year found + DUCKDB_API static int32_t ExtractYear(date_t date, int32_t *last_year); + DUCKDB_API static int32_t ExtractYear(timestamp_t ts, int32_t *last_year); + //! Extract month of a date entry + DUCKDB_API static int32_t ExtractMonth(date_t date); + //! Extract day of a date entry + DUCKDB_API static int32_t ExtractDay(date_t date); + //! Extract the day of the week (1-7) + DUCKDB_API static int32_t ExtractISODayOfTheWeek(date_t date); + //! Extract the day of the year + DUCKDB_API static int32_t ExtractDayOfTheYear(date_t date); + //! Extract the ISO week number + //! ISO weeks start on Monday and the first week of a year + //! contains January 4 of that year. + //! In the ISO week-numbering system, it is possible for early-January dates + //! to be part of the 52nd or 53rd week of the previous year. + DUCKDB_API static void ExtractISOYearWeek(date_t date, int32_t &year, int32_t &week); + DUCKDB_API static int32_t ExtractISOWeekNumber(date_t date); + DUCKDB_API static int32_t ExtractISOYearNumber(date_t date); + //! Extract the week number as Python handles it. + //! Either Monday or Sunday is the first day of the week, + //! and any date before the first Monday/Sunday returns week 0 + //! This is a bit more consistent because week numbers in a year are always incrementing + DUCKDB_API static int32_t ExtractWeekNumberRegular(date_t date, bool monday_first = true); + //! Returns the date of the monday of the current week. + DUCKDB_API static date_t GetMondayOfCurrentWeek(date_t date); + + //! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3) + DUCKDB_API static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result); + + DUCKDB_API static string ConversionError(const string &str); + DUCKDB_API static string ConversionError(string_t str); + +private: + static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset); +}; + +} // namespace duckdb + +namespace std { + +//! Date +template <> +struct hash { + std::size_t operator()(const duckdb::date_t &k) const { + using std::hash; + return hash()((int32_t)k); + } +}; +} // namespace std + + + + + +#include + +namespace duckdb { + +//! Type used to represent time (microseconds) +struct dtime_t { // NOLINT + int64_t micros; + + dtime_t() = default; + explicit inline dtime_t(int64_t micros_p) : micros(micros_p) { + } + inline dtime_t &operator=(int64_t micros_p) { + micros = micros_p; + return *this; + } + + // explicit conversion + explicit inline operator int64_t() const { + return micros; + } + explicit inline operator double() const { + return micros; + } + + // comparison operators + inline bool operator==(const dtime_t &rhs) const { + return micros == rhs.micros; + }; + inline bool operator!=(const dtime_t &rhs) const { + return micros != rhs.micros; + }; + inline bool operator<=(const dtime_t &rhs) const { + return micros <= rhs.micros; + }; + inline bool operator<(const dtime_t &rhs) const { + return micros < rhs.micros; + }; + inline bool operator>(const dtime_t &rhs) const { + return micros > rhs.micros; + }; + inline bool operator>=(const dtime_t &rhs) const { + return micros >= rhs.micros; + }; + + // arithmetic operators + inline dtime_t operator+(const int64_t µs) const { + return dtime_t(this->micros + micros); + }; + inline dtime_t operator+(const double µs) const { + return dtime_t(this->micros + int64_t(micros)); + }; + inline dtime_t operator-(const int64_t µs) const { + return dtime_t(this->micros - micros); + }; + inline dtime_t operator*(const idx_t &copies) const { + return dtime_t(this->micros * copies); + }; + inline dtime_t operator/(const idx_t &copies) const { + return dtime_t(this->micros / copies); + }; + inline int64_t operator-(const dtime_t &other) const { + return this->micros - other.micros; + }; + + // in-place operators + inline dtime_t &operator+=(const int64_t µs) { + this->micros += micros; + return *this; + }; + inline dtime_t &operator-=(const int64_t µs) { + this->micros -= micros; + return *this; + }; + inline dtime_t &operator+=(const dtime_t &other) { + this->micros += other.micros; + return *this; + }; + + // special values + static inline dtime_t allballs() { + return dtime_t(0); + } // NOLINT +}; + +struct dtime_tz_t : public dtime_t {}; + +} // namespace duckdb + +namespace std { + +//! Time +template <> +struct hash { + std::size_t operator()(const duckdb::dtime_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +template <> +struct hash { + std::size_t operator()(const duckdb::dtime_tz_t &k) const { + using std::hash; + return hash()((int64_t)k); + } +}; +} // namespace std + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/interval.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +namespace duckdb { + +struct dtime_t; +struct date_t; +struct timestamp_t; + +struct interval_t { + int32_t months; + int32_t days; + int64_t micros; + + inline bool operator==(const interval_t &rhs) const { + return this->days == rhs.days && this->months == rhs.months && this->micros == rhs.micros; + } +}; + +//! The Interval class is a static class that holds helper functions for the Interval +//! type. +class Interval { +public: + static constexpr const int32_t MONTHS_PER_MILLENIUM = 12000; + static constexpr const int32_t MONTHS_PER_CENTURY = 1200; + static constexpr const int32_t MONTHS_PER_DECADE = 120; + static constexpr const int32_t MONTHS_PER_YEAR = 12; + static constexpr const int32_t MONTHS_PER_QUARTER = 3; + static constexpr const int32_t DAYS_PER_WEEK = 7; + //! only used for interval comparison/ordering purposes, in which case a month counts as 30 days + static constexpr const int64_t DAYS_PER_MONTH = 30; + static constexpr const int64_t DAYS_PER_YEAR = 365; + static constexpr const int64_t MSECS_PER_SEC = 1000; + static constexpr const int32_t SECS_PER_MINUTE = 60; + static constexpr const int32_t MINS_PER_HOUR = 60; + static constexpr const int32_t HOURS_PER_DAY = 24; + static constexpr const int32_t SECS_PER_HOUR = SECS_PER_MINUTE * MINS_PER_HOUR; + static constexpr const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY; + static constexpr const int32_t SECS_PER_WEEK = SECS_PER_DAY * DAYS_PER_WEEK; + + static constexpr const int64_t MICROS_PER_MSEC = 1000; + static constexpr const int64_t MICROS_PER_SEC = MICROS_PER_MSEC * MSECS_PER_SEC; + static constexpr const int64_t MICROS_PER_MINUTE = MICROS_PER_SEC * SECS_PER_MINUTE; + static constexpr const int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * MINS_PER_HOUR; + static constexpr const int64_t MICROS_PER_DAY = MICROS_PER_HOUR * HOURS_PER_DAY; + static constexpr const int64_t MICROS_PER_WEEK = MICROS_PER_DAY * DAYS_PER_WEEK; + static constexpr const int64_t MICROS_PER_MONTH = MICROS_PER_DAY * DAYS_PER_MONTH; + + static constexpr const int64_t NANOS_PER_MICRO = 1000; + static constexpr const int64_t NANOS_PER_MSEC = NANOS_PER_MICRO * MICROS_PER_MSEC; + static constexpr const int64_t NANOS_PER_SEC = NANOS_PER_MSEC * MSECS_PER_SEC; + static constexpr const int64_t NANOS_PER_MINUTE = NANOS_PER_SEC * SECS_PER_MINUTE; + static constexpr const int64_t NANOS_PER_HOUR = NANOS_PER_MINUTE * MINS_PER_HOUR; + static constexpr const int64_t NANOS_PER_DAY = NANOS_PER_HOUR * HOURS_PER_DAY; + static constexpr const int64_t NANOS_PER_WEEK = NANOS_PER_DAY * DAYS_PER_WEEK; + +public: + //! Convert a string to an interval object + static bool FromString(const string &str, interval_t &result); + //! Convert a string to an interval object + static bool FromCString(const char *str, idx_t len, interval_t &result, string *error_message, bool strict); + //! Convert an interval object to a string + static string ToString(const interval_t &val); + + //! Convert milliseconds to a normalised interval + DUCKDB_API static interval_t FromMicro(int64_t micros); + + //! Get Interval in milliseconds + static int64_t GetMilli(const interval_t &val); + + //! Get Interval in microseconds + static int64_t GetMicro(const interval_t &val); + + //! Get Interval in Nanoseconds + static int64_t GetNanoseconds(const interval_t &val); + + //! Returns the age between two timestamps (including 30 day months) + static interval_t GetAge(timestamp_t timestamp_1, timestamp_t timestamp_2); + + //! Returns the exact difference between two timestamps (days and seconds) + static interval_t GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2); + + //! Returns the inverted interval + static interval_t Invert(interval_t interval); + + //! Add an interval to a date + static date_t Add(date_t left, interval_t right); + //! Add an interval to a timestamp + static timestamp_t Add(timestamp_t left, interval_t right); + //! Add an interval to a time. In case the time overflows or underflows, modify the date by the overflow. + //! For example if we go from 23:00 to 02:00, we add a day to the date + static dtime_t Add(dtime_t left, interval_t right, date_t &date); + + //! Comparison operators + static bool Equals(interval_t left, interval_t right); + static bool GreaterThan(interval_t left, interval_t right); + static bool GreaterThanEquals(interval_t left, interval_t right); +}; +} // namespace duckdb + + +namespace duckdb { + +class CastFunctionSet; +class Deserializer; +class Serializer; +struct GetCastFunctionInput; + +//! The Value object holds a single arbitrary value of any type that can be +//! stored in the database. +class Value { + friend struct StringValue; + friend struct StructValue; + friend struct ListValue; + friend struct UnionValue; + +public: + //! Create an empty NULL value of the specified type + DUCKDB_API explicit Value(LogicalType type = LogicalType::SQLNULL); + //! Create an INTEGER value + DUCKDB_API Value(int32_t val); // NOLINT: Allow implicit conversion from `int32_t` + //! Create a BIGINT value + DUCKDB_API Value(int64_t val); // NOLINT: Allow implicit conversion from `int64_t` + //! Create a FLOAT value + DUCKDB_API Value(float val); // NOLINT: Allow implicit conversion from `float` + //! Create a DOUBLE value + DUCKDB_API Value(double val); // NOLINT: Allow implicit conversion from `double` + //! Create a VARCHAR value + DUCKDB_API Value(const char *val); // NOLINT: Allow implicit conversion from `const char *` + //! Create a NULL value + DUCKDB_API Value(std::nullptr_t val); // NOLINT: Allow implicit conversion from `nullptr_t` + //! Create a VARCHAR value + DUCKDB_API Value(string_t val); // NOLINT: Allow implicit conversion from `string_t` + //! Create a VARCHAR value + DUCKDB_API Value(string val); // NOLINT: Allow implicit conversion from `string` + //! Copy constructor + DUCKDB_API Value(const Value &other); + //! Move constructor + DUCKDB_API Value(Value &&other) noexcept; + //! Destructor + DUCKDB_API ~Value(); + + // copy assignment + DUCKDB_API Value &operator=(const Value &other); + // move assignment + DUCKDB_API Value &operator=(Value &&other) noexcept; + + inline LogicalType &type() { + return type_; + } + inline const LogicalType &type() const { + return type_; + } + inline bool IsNull() const { + return is_null; + } + + //! Create the lowest possible value of a given type (numeric only) + DUCKDB_API static Value MinimumValue(const LogicalType &type); + //! Create the highest possible value of a given type (numeric only) + DUCKDB_API static Value MaximumValue(const LogicalType &type); + //! Create a Numeric value of the specified type with the specified value + DUCKDB_API static Value Numeric(const LogicalType &type, int64_t value); + DUCKDB_API static Value Numeric(const LogicalType &type, hugeint_t value); + + //! Create a tinyint Value from a specified value + DUCKDB_API static Value BOOLEAN(int8_t value); + //! Create a tinyint Value from a specified value + DUCKDB_API static Value TINYINT(int8_t value); + //! Create a smallint Value from a specified value + DUCKDB_API static Value SMALLINT(int16_t value); + //! Create an integer Value from a specified value + DUCKDB_API static Value INTEGER(int32_t value); + //! Create a bigint Value from a specified value + DUCKDB_API static Value BIGINT(int64_t value); + //! Create an unsigned tinyint Value from a specified value DUCKDB_API static Value UTINYINT(uint8_t value); //! Create an unsigned smallint Value from a specified value DUCKDB_API static Value USMALLINT(uint16_t value); @@ -3611,6 +4275,9 @@ class AllocatedData { }; class Allocator { + // 281TB ought to be enough for anybody + static constexpr const idx_t MAXIMUM_ALLOC_SIZE = 281474976710656ULL; + public: DUCKDB_API Allocator(); DUCKDB_API Allocator(allocate_function_ptr_t allocate_function_p, free_function_ptr_t free_function_p, @@ -3683,6 +4350,7 @@ void DestroyObject(T *ptr) { //! As such this class should be used primarily for larger allocations. struct BufferAllocator { DUCKDB_API static Allocator &Get(ClientContext &context); + DUCKDB_API static Allocator &Get(DatabaseInstance &db); }; } // namespace duckdb @@ -3717,187 +4385,52 @@ class ArenaAllocator { ArenaChunk *GetHead(); ArenaChunk *GetTail(); - bool IsEmpty(); - -private: - //! Internal allocator that is used by the arena allocator - Allocator &allocator; - idx_t current_capacity; - unique_ptr head; - ArenaChunk *tail; -}; - -} // namespace duckdb - - -namespace duckdb { -//! A string heap is the owner of a set of strings, strings can be inserted into -//! it On every insert, a pointer to the inserted string is returned The -//! returned pointer will remain valid until the StringHeap is destroyed -class StringHeap { -public: - StringHeap(); - - void Destroy(); - void Move(StringHeap &other); - - //! Add a string to the string heap, returns a pointer to the string - string_t AddString(const char *data, idx_t len); - //! Add a string to the string heap, returns a pointer to the string - string_t AddString(const char *data); - //! Add a string to the string heap, returns a pointer to the string - string_t AddString(const string &data); - //! Add a string to the string heap, returns a pointer to the string - string_t AddString(const string_t &data); - //! Add a blob to the string heap; blobs can be non-valid UTF8 - string_t AddBlob(const string_t &data); - //! Add a blob to the string heap; blobs can be non-valid UTF8 - string_t AddBlob(const char *data, idx_t len); - //! Allocates space for an empty string of size "len" on the heap - string_t EmptyString(idx_t len); - -private: - ArenaAllocator allocator; -}; - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/string_type.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - -#include - -namespace duckdb { - -struct string_t { - friend struct StringComparisonOperators; - friend class StringSegment; - -public: - static constexpr idx_t PREFIX_BYTES = 4 * sizeof(char); - static constexpr idx_t INLINE_BYTES = 12 * sizeof(char); - static constexpr idx_t HEADER_SIZE = sizeof(uint32_t) + PREFIX_BYTES; -#ifndef DUCKDB_DEBUG_NO_INLINE - static constexpr idx_t PREFIX_LENGTH = PREFIX_BYTES; - static constexpr idx_t INLINE_LENGTH = INLINE_BYTES; -#else - static constexpr idx_t PREFIX_LENGTH = 0; - static constexpr idx_t INLINE_LENGTH = 0; -#endif - - string_t() = default; - explicit string_t(uint32_t len) { - value.inlined.length = len; - } - string_t(const char *data, uint32_t len) { - value.inlined.length = len; - D_ASSERT(data || GetSize() == 0); - if (IsInlined()) { - // zero initialize the prefix first - // this makes sure that strings with length smaller than 4 still have an equal prefix - memset(value.inlined.inlined, 0, INLINE_BYTES); - if (GetSize() == 0) { - return; - } - // small string: inlined - memcpy(value.inlined.inlined, data, GetSize()); - } else { - // large string: store pointer -#ifndef DUCKDB_DEBUG_NO_INLINE - memcpy(value.pointer.prefix, data, PREFIX_LENGTH); -#else - memset(value.pointer.prefix, 0, PREFIX_BYTES); -#endif - value.pointer.ptr = (char *)data; - } - } - string_t(const char *data) : string_t(data, strlen(data)) { // NOLINT: Allow implicit conversion from `const char*` - } - string_t(const string &value) - : string_t(value.c_str(), value.size()) { // NOLINT: Allow implicit conversion from `const char*` - } - - bool IsInlined() const { - return GetSize() <= INLINE_LENGTH; - } - - //! this is unsafe since the string will not be terminated at the end - const char *GetDataUnsafe() const { - return IsInlined() ? (const char *)value.inlined.inlined : value.pointer.ptr; - } - - char *GetDataWriteable() const { - return IsInlined() ? (char *)value.inlined.inlined : value.pointer.ptr; - } - - const char *GetPrefix() const { - return value.pointer.prefix; - } + bool IsEmpty(); - idx_t GetSize() const { - return value.inlined.length; - } +private: + //! Internal allocator that is used by the arena allocator + Allocator &allocator; + idx_t current_capacity; + unique_ptr head; + ArenaChunk *tail; +}; - string GetString() const { - return string(GetDataUnsafe(), GetSize()); - } +} // namespace duckdb - explicit operator string() const { - return GetString(); - } - void Finalize() { - // set trailing NULL byte - if (GetSize() <= INLINE_LENGTH) { - // fill prefix with zeros if the length is smaller than the prefix length - for (idx_t i = GetSize(); i < INLINE_BYTES; i++) { - value.inlined.inlined[i] = '\0'; - } - } else { - // copy the data into the prefix -#ifndef DUCKDB_DEBUG_NO_INLINE - auto dataptr = (char *)GetDataUnsafe(); - memcpy(value.pointer.prefix, dataptr, PREFIX_LENGTH); -#else - memset(value.pointer.prefix, 0, PREFIX_BYTES); -#endif - } - } +namespace duckdb { +//! A string heap is the owner of a set of strings, strings can be inserted into +//! it On every insert, a pointer to the inserted string is returned The +//! returned pointer will remain valid until the StringHeap is destroyed +class StringHeap { +public: + StringHeap(Allocator &allocator = Allocator::DefaultAllocator()); - void Verify() const; - void VerifyNull() const; - bool operator<(const string_t &r) const { - auto this_str = this->GetString(); - auto r_str = r.GetString(); - return this_str < r_str; - } + void Destroy(); + void Move(StringHeap &other); + + //! Add a string to the string heap, returns a pointer to the string + string_t AddString(const char *data, idx_t len); + //! Add a string to the string heap, returns a pointer to the string + string_t AddString(const char *data); + //! Add a string to the string heap, returns a pointer to the string + string_t AddString(const string &data); + //! Add a string to the string heap, returns a pointer to the string + string_t AddString(const string_t &data); + //! Add a blob to the string heap; blobs can be non-valid UTF8 + string_t AddBlob(const string_t &data); + //! Add a blob to the string heap; blobs can be non-valid UTF8 + string_t AddBlob(const char *data, idx_t len); + //! Allocates space for an empty string of size "len" on the heap + string_t EmptyString(idx_t len); private: - union { - struct { - uint32_t length; - char prefix[4]; - char *ptr; - } pointer; - struct { - uint32_t length; - char inlined[12]; - } inlined; - } value; + ArenaAllocator allocator; }; } // namespace duckdb + //===----------------------------------------------------------------------===// // DuckDB // @@ -7453,223 +7986,147 @@ struct ExpressionState { public: void AddChild(Expression *expr); - void Finalize(); - Allocator &GetAllocator(); - bool HasContext(); - ClientContext &GetContext(); - - void Verify(ExpressionExecutorState &root); -}; - -struct ExecuteFunctionState : public ExpressionState { - ExecuteFunctionState(const Expression &expr, ExpressionExecutorState &root); - ~ExecuteFunctionState(); - - unique_ptr local_state; - -public: - static FunctionLocalState *GetFunctionState(ExpressionState &state) { - return ((ExecuteFunctionState &)state).local_state.get(); - } -}; - -struct ExpressionExecutorState { - explicit ExpressionExecutorState(const string &name); - - unique_ptr root_state; - ExpressionExecutor *executor = nullptr; - CycleCounter profiler; - string name; - - void Verify(); -}; - -} // namespace duckdb - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/storage/statistics/base_statistics.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/operator/comparison_operators.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/hugeint.hpp -// -// -//===----------------------------------------------------------------------===// - - - - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/limits.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -template -struct NumericLimits { - DUCKDB_API static T Minimum(); - DUCKDB_API static T Maximum(); - DUCKDB_API static bool IsSigned(); - DUCKDB_API static idx_t Digits(); -}; - -template <> -struct NumericLimits { - DUCKDB_API static int8_t Minimum(); - DUCKDB_API static int8_t Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 3; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static int16_t Minimum(); - DUCKDB_API static int16_t Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 5; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static int32_t Minimum(); - DUCKDB_API static int32_t Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 10; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static int64_t Minimum(); - DUCKDB_API static int64_t Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 19; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static hugeint_t Minimum(); - DUCKDB_API static hugeint_t Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 39; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static uint8_t Minimum(); - DUCKDB_API static uint8_t Maximum(); - DUCKDB_API static bool IsSigned() { - return false; - } - DUCKDB_API static idx_t Digits() { - return 3; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static uint16_t Minimum(); - DUCKDB_API static uint16_t Maximum(); - DUCKDB_API static bool IsSigned() { - return false; - } - DUCKDB_API static idx_t Digits() { - return 5; - } -}; -template <> -struct NumericLimits { - DUCKDB_API static uint32_t Minimum(); - DUCKDB_API static uint32_t Maximum(); - DUCKDB_API static bool IsSigned() { - return false; - } - DUCKDB_API static idx_t Digits() { - return 10; - } + void Finalize(); + Allocator &GetAllocator(); + bool HasContext(); + ClientContext &GetContext(); + + void Verify(ExpressionExecutorState &root); }; -template <> -struct NumericLimits { - DUCKDB_API static uint64_t Minimum(); - DUCKDB_API static uint64_t Maximum(); - DUCKDB_API static bool IsSigned() { - return false; - } - DUCKDB_API static idx_t Digits() { - return 20; + +struct ExecuteFunctionState : public ExpressionState { + ExecuteFunctionState(const Expression &expr, ExpressionExecutorState &root); + ~ExecuteFunctionState(); + + unique_ptr local_state; + +public: + static FunctionLocalState *GetFunctionState(ExpressionState &state) { + return ((ExecuteFunctionState &)state).local_state.get(); } }; -template <> -struct NumericLimits { - DUCKDB_API static float Minimum(); - DUCKDB_API static float Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 127; - } + +struct ExpressionExecutorState { + explicit ExpressionExecutorState(const string &name); + + unique_ptr root_state; + ExpressionExecutor *executor = nullptr; + CycleCounter profiler; + string name; + + void Verify(); }; -template <> -struct NumericLimits { - DUCKDB_API static double Minimum(); - DUCKDB_API static double Maximum(); - DUCKDB_API static bool IsSigned() { - return true; - } - DUCKDB_API static idx_t Digits() { - return 250; + +} // namespace duckdb + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/statistics/base_statistics.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/operator/comparison_operators.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/common/types/hugeint.hpp +// +// +//===----------------------------------------------------------------------===// + + + + + + + + + + + + +namespace duckdb { + +//! Returns the PhysicalType for the given type +template +PhysicalType GetTypeId() { + if (std::is_same()) { + return PhysicalType::BOOL; + } else if (std::is_same()) { + return PhysicalType::INT8; + } else if (std::is_same()) { + return PhysicalType::INT16; + } else if (std::is_same()) { + return PhysicalType::INT32; + } else if (std::is_same()) { + return PhysicalType::INT64; + } else if (std::is_same()) { + return PhysicalType::UINT8; + } else if (std::is_same()) { + return PhysicalType::UINT16; + } else if (std::is_same()) { + return PhysicalType::UINT32; + } else if (std::is_same()) { + return PhysicalType::UINT64; + } else if (std::is_same()) { + return PhysicalType::INT128; + } else if (std::is_same()) { + return PhysicalType::INT32; + } else if (std::is_same()) { + return PhysicalType::INT64; + } else if (std::is_same()) { + return PhysicalType::INT64; + } else if (std::is_same()) { + return PhysicalType::FLOAT; + } else if (std::is_same()) { + return PhysicalType::DOUBLE; + } else if (std::is_same() || std::is_same() || std::is_same()) { + return PhysicalType::VARCHAR; + } else if (std::is_same()) { + return PhysicalType::INTERVAL; + } else { + return PhysicalType::INVALID; } -}; +} + +template +bool TypeIsNumber() { + return std::is_integral() || std::is_floating_point() || std::is_same(); +} + +template +bool IsValidType() { + return GetTypeId() != PhysicalType::INVALID; +} + +template +bool IsIntegerType() { + return TypeIsIntegral(GetTypeId()); +} } // namespace duckdb + namespace duckdb { //! The Hugeint class contains static operations for the INT128 type @@ -7809,112 +8266,20 @@ template <> bool Hugeint::TryConvert(uint32_t value, hugeint_t &result); template <> bool Hugeint::TryConvert(uint64_t value, hugeint_t &result); -template <> -bool Hugeint::TryConvert(float value, hugeint_t &result); -template <> -bool Hugeint::TryConvert(double value, hugeint_t &result); -template <> -bool Hugeint::TryConvert(long double value, hugeint_t &result); - -} // namespace duckdb - -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/interval.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - -namespace duckdb { - -//! The Interval class is a static class that holds helper functions for the Interval -//! type. -class Interval { -public: - static constexpr const int32_t MONTHS_PER_MILLENIUM = 12000; - static constexpr const int32_t MONTHS_PER_CENTURY = 1200; - static constexpr const int32_t MONTHS_PER_DECADE = 120; - static constexpr const int32_t MONTHS_PER_YEAR = 12; - static constexpr const int32_t MONTHS_PER_QUARTER = 3; - static constexpr const int32_t DAYS_PER_WEEK = 7; - //! only used for interval comparison/ordering purposes, in which case a month counts as 30 days - static constexpr const int64_t DAYS_PER_MONTH = 30; - static constexpr const int64_t DAYS_PER_YEAR = 365; - static constexpr const int64_t MSECS_PER_SEC = 1000; - static constexpr const int32_t SECS_PER_MINUTE = 60; - static constexpr const int32_t MINS_PER_HOUR = 60; - static constexpr const int32_t HOURS_PER_DAY = 24; - static constexpr const int32_t SECS_PER_HOUR = SECS_PER_MINUTE * MINS_PER_HOUR; - static constexpr const int32_t SECS_PER_DAY = SECS_PER_HOUR * HOURS_PER_DAY; - static constexpr const int32_t SECS_PER_WEEK = SECS_PER_DAY * DAYS_PER_WEEK; - - static constexpr const int64_t MICROS_PER_MSEC = 1000; - static constexpr const int64_t MICROS_PER_SEC = MICROS_PER_MSEC * MSECS_PER_SEC; - static constexpr const int64_t MICROS_PER_MINUTE = MICROS_PER_SEC * SECS_PER_MINUTE; - static constexpr const int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * MINS_PER_HOUR; - static constexpr const int64_t MICROS_PER_DAY = MICROS_PER_HOUR * HOURS_PER_DAY; - static constexpr const int64_t MICROS_PER_WEEK = MICROS_PER_DAY * DAYS_PER_WEEK; - static constexpr const int64_t MICROS_PER_MONTH = MICROS_PER_DAY * DAYS_PER_MONTH; - - static constexpr const int64_t NANOS_PER_MICRO = 1000; - static constexpr const int64_t NANOS_PER_MSEC = NANOS_PER_MICRO * MICROS_PER_MSEC; - static constexpr const int64_t NANOS_PER_SEC = NANOS_PER_MSEC * MSECS_PER_SEC; - static constexpr const int64_t NANOS_PER_MINUTE = NANOS_PER_SEC * SECS_PER_MINUTE; - static constexpr const int64_t NANOS_PER_HOUR = NANOS_PER_MINUTE * MINS_PER_HOUR; - static constexpr const int64_t NANOS_PER_DAY = NANOS_PER_HOUR * HOURS_PER_DAY; - static constexpr const int64_t NANOS_PER_WEEK = NANOS_PER_DAY * DAYS_PER_WEEK; - -public: - //! Convert a string to an interval object - static bool FromString(const string &str, interval_t &result); - //! Convert a string to an interval object - static bool FromCString(const char *str, idx_t len, interval_t &result, string *error_message, bool strict); - //! Convert an interval object to a string - static string ToString(const interval_t &val); - - //! Convert milliseconds to a normalised interval - DUCKDB_API static interval_t FromMicro(int64_t micros); - - //! Get Interval in milliseconds - static int64_t GetMilli(const interval_t &val); - - //! Get Interval in microseconds - static int64_t GetMicro(const interval_t &val); - - //! Get Interval in Nanoseconds - static int64_t GetNanoseconds(const interval_t &val); - - //! Returns the age between two timestamps (including 30 day months) - static interval_t GetAge(timestamp_t timestamp_1, timestamp_t timestamp_2); - - //! Returns the exact difference between two timestamps (days and seconds) - static interval_t GetDifference(timestamp_t timestamp_1, timestamp_t timestamp_2); - - //! Returns the inverted interval - static interval_t Invert(interval_t interval); - - //! Add an interval to a date - static date_t Add(date_t left, interval_t right); - //! Add an interval to a timestamp - static timestamp_t Add(timestamp_t left, interval_t right); - //! Add an interval to a time. In case the time overflows or underflows, modify the date by the overflow. - //! For example if we go from 23:00 to 02:00, we add a day to the date - static dtime_t Add(dtime_t left, interval_t right, date_t &date); +template <> +bool Hugeint::TryConvert(float value, hugeint_t &result); +template <> +bool Hugeint::TryConvert(double value, hugeint_t &result); +template <> +bool Hugeint::TryConvert(long double value, hugeint_t &result); +template <> +bool Hugeint::TryConvert(const char *value, hugeint_t &result); - //! Comparison operators - static bool Equals(interval_t left, interval_t right); - static bool GreaterThan(interval_t left, interval_t right); - static bool GreaterThanEquals(interval_t left, interval_t right); -}; } // namespace duckdb + #include namespace duckdb { @@ -8806,6 +9171,7 @@ class CatalogEntry { public: virtual unique_ptr AlterEntry(ClientContext &context, AlterInfo *info); + virtual void UndoAlter(ClientContext &context, AlterInfo *info); virtual unique_ptr Copy(ClientContext &context); @@ -10797,6 +11163,8 @@ class TableCatalogEntry : public StandardEntry { public: bool HasGeneratedColumns() const; unique_ptr AlterEntry(ClientContext &context, AlterInfo *info) override; + void UndoAlter(ClientContext &context, AlterInfo *info) override; + //! Returns whether or not a column with the given name exists DUCKDB_API bool ColumnExists(const string &name); //! Returns a reference to the column of the specified name. Throws an @@ -10975,6 +11343,9 @@ class LogicalOperator { return true; } + //! Returns the set of table indexes of this operator + virtual vector GetTableIndex() const; + protected: //! Resolve types for this specific operator virtual void ResolveTypes() = 0; @@ -13825,6 +14196,9 @@ class Pipeline : public std::enable_shared_from_this { void Reset(); void ResetSink(); void ResetSource(bool force); + void ClearSource() { + source_state.reset(); + } void Schedule(shared_ptr &event); //! Finalize this pipeline @@ -15162,27 +15536,46 @@ namespace duckdb { struct AlterInfo; class ClientContext; +struct MappingValue; +struct EntryIndex; typedef unordered_map> set_lock_map_t; -struct MappingValue { - explicit MappingValue(idx_t index_) : index(index_), timestamp(0), deleted(false), parent(nullptr) { +struct EntryValue { + EntryValue() { + throw InternalException("EntryValue called without a catalog entry"); } - idx_t index; - transaction_t timestamp; - bool deleted; - unique_ptr child; - MappingValue *parent; + explicit EntryValue(unique_ptr entry_p) : entry(move(entry_p)), reference_count(0) { + } + //! enable move constructors + EntryValue(EntryValue &&other) noexcept { + Swap(other); + } + EntryValue &operator=(EntryValue &&other) noexcept { + Swap(other); + return *this; + } + void Swap(EntryValue &other) { + std::swap(entry, other.entry); + idx_t count = reference_count; + reference_count = other.reference_count.load(); + other.reference_count = count; + } + + unique_ptr entry; + atomic reference_count; }; //! The Catalog Set stores (key, value) map of a set of CatalogEntries class CatalogSet { friend class DependencyManager; friend class EntryDropper; + friend struct EntryIndex; public: DUCKDB_API explicit CatalogSet(Catalog &catalog, unique_ptr defaults = nullptr); + ~CatalogSet(); //! Create an entry in the catalog set. Returns whether or not it was //! successful. @@ -15223,7 +15616,6 @@ class CatalogSet { DUCKDB_API static bool HasConflict(ClientContext &context, transaction_t timestamp); DUCKDB_API static bool UseTimestamp(ClientContext &context, transaction_t timestamp); - CatalogEntry *GetEntryFromIndex(idx_t index); void UpdateTimestamp(CatalogEntry *entry, transaction_t timestamp); private: @@ -15236,29 +15628,32 @@ class CatalogSet { //! Given a root entry, gets the entry valid for this transaction CatalogEntry *GetEntryForTransaction(ClientContext &context, CatalogEntry *current); CatalogEntry *GetCommittedEntry(CatalogEntry *current); - bool GetEntryInternal(ClientContext &context, const string &name, idx_t &entry_index, CatalogEntry *&entry); - bool GetEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry *&entry); + bool GetEntryInternal(ClientContext &context, const string &name, EntryIndex *entry_index, CatalogEntry *&entry); + bool GetEntryInternal(ClientContext &context, EntryIndex &entry_index, CatalogEntry *&entry); //! Drops an entry from the catalog set; must hold the catalog_lock to safely call this - void DropEntryInternal(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade); + void DropEntryInternal(ClientContext &context, EntryIndex entry_index, CatalogEntry &entry, bool cascade); CatalogEntry *CreateEntryInternal(ClientContext &context, unique_ptr entry); MappingValue *GetMapping(ClientContext &context, const string &name, bool get_latest = false); - void PutMapping(ClientContext &context, const string &name, idx_t entry_index); + void PutMapping(ClientContext &context, const string &name, EntryIndex entry_index); void DeleteMapping(ClientContext &context, const string &name); - void DropEntryDependencies(ClientContext &context, idx_t entry_index, CatalogEntry &entry, bool cascade); + void DropEntryDependencies(ClientContext &context, EntryIndex &entry_index, CatalogEntry &entry, bool cascade); //! Create all default entries void CreateDefaultEntries(ClientContext &context, unique_lock &lock); //! Attempt to create a default entry with the specified name. Returns the entry if successful, nullptr otherwise. CatalogEntry *CreateDefaultEntry(ClientContext &context, const string &name, unique_lock &lock); + EntryIndex PutEntry(idx_t entry_index, unique_ptr entry); + void PutEntry(EntryIndex index, unique_ptr entry); + private: Catalog &catalog; //! The catalog lock is used to make changes to the data mutex catalog_lock; + //! The set of catalog entries + unordered_map entries; //! Mapping of string to catalog entry case_insensitive_map_t> mapping; - //! The set of catalog entries - unordered_map> entries; //! The current catalog entry index idx_t current_entry = 0; //! The generator used to generate default internal entries @@ -17398,6 +17793,7 @@ class ColumnRefExpression : public ParsedExpression { namespace duckdb { struct string_t; +struct interval_t; // efficient hash function that maximizes the avalanche effect and minimizes // bias @@ -18545,6 +18941,9 @@ struct OperatorExtensionInfo { typedef BoundStatement (*bind_function_t)(ClientContext &context, Binder &binder, OperatorExtensionInfo *info, SQLStatement &statement); +// forward declaration to avoid circular reference +struct LogicalExtensionOperator; + class OperatorExtension { public: bind_function_t Bind; @@ -18552,6 +18951,10 @@ class OperatorExtension { //! Additional info passed to the CreatePlan & Bind functions shared_ptr operator_info; + virtual std::string GetName() = 0; + virtual std::unique_ptr Deserialize(LogicalDeserializationState &state, + FieldReader &reader) = 0; + DUCKDB_API virtual ~OperatorExtension() { } }; @@ -18697,7 +19100,7 @@ struct DBConfig { //! A reference to the (shared) default allocator (Allocator::DefaultAllocator) shared_ptr default_allocator; //! Extensions made to binder - vector operator_extensions; + vector> operator_extensions; public: DUCKDB_API static DBConfig &GetConfig(ClientContext &context); @@ -19439,6 +19842,8 @@ DUCKDB_API const char *duckdb_result_error(duckdb_result *result); /*! Fetches a data chunk from the duckdb_result. This function should be called repeatedly until the result is exhausted. +The result must be destroyed with `duckdb_destroy_data_chunk`. + This function supersedes all `duckdb_value` functions, as well as the `duckdb_column_data` and `duckdb_nullmask_data` functions. It results in significantly better performance, and should be preferred in newer code-bases. @@ -21095,145 +21500,6 @@ DUCKDB_API void duckdb_destroy_task_state(duckdb_task_state state); //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/common/types/date.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - - -namespace duckdb { - -//! The Date class is a static class that holds helper functions for the Date type. -class Date { -public: - static const char *PINF; // NOLINT - static const char *NINF; // NOLINT - static const char *EPOCH; // NOLINT - - static const string_t MONTH_NAMES[12]; - static const string_t MONTH_NAMES_ABBREVIATED[12]; - static const string_t DAY_NAMES[7]; - static const string_t DAY_NAMES_ABBREVIATED[7]; - static const int32_t NORMAL_DAYS[13]; - static const int32_t CUMULATIVE_DAYS[13]; - static const int32_t LEAP_DAYS[13]; - static const int32_t CUMULATIVE_LEAP_DAYS[13]; - static const int32_t CUMULATIVE_YEAR_DAYS[401]; - static const int8_t MONTH_PER_DAY_OF_YEAR[365]; - static const int8_t LEAP_MONTH_PER_DAY_OF_YEAR[366]; - - // min date is 5877642-06-25 (BC) (-2^31+2) - constexpr static const int32_t DATE_MIN_YEAR = -5877641; - constexpr static const int32_t DATE_MIN_MONTH = 6; - constexpr static const int32_t DATE_MIN_DAY = 25; - // max date is 5881580-07-10 (2^31-2) - constexpr static const int32_t DATE_MAX_YEAR = 5881580; - constexpr static const int32_t DATE_MAX_MONTH = 7; - constexpr static const int32_t DATE_MAX_DAY = 10; - constexpr static const int32_t EPOCH_YEAR = 1970; - - constexpr static const int32_t YEAR_INTERVAL = 400; - constexpr static const int32_t DAYS_PER_YEAR_INTERVAL = 146097; - -public: - //! Convert a string in the format "YYYY-MM-DD" to a date object - DUCKDB_API static date_t FromString(const string &str, bool strict = false); - //! Convert a string in the format "YYYY-MM-DD" to a date object - DUCKDB_API static date_t FromCString(const char *str, idx_t len, bool strict = false); - //! Convert a date object to a string in the format "YYYY-MM-DD" - DUCKDB_API static string ToString(date_t date); - //! Try to convert text in a buffer to a date; returns true if parsing was successful - //! If the date was a "special" value, the special flag will be set. - DUCKDB_API static bool TryConvertDate(const char *buf, idx_t len, idx_t &pos, date_t &result, bool &special, - bool strict = false); - - //! Create a string "YYYY-MM-DD" from a specified (year, month, day) - //! combination - DUCKDB_API static string Format(int32_t year, int32_t month, int32_t day); - - //! Extract the year, month and day from a given date object - DUCKDB_API static void Convert(date_t date, int32_t &out_year, int32_t &out_month, int32_t &out_day); - //! Create a Date object from a specified (year, month, day) combination - DUCKDB_API static date_t FromDate(int32_t year, int32_t month, int32_t day); - DUCKDB_API static bool TryFromDate(int32_t year, int32_t month, int32_t day, date_t &result); - - //! Returns true if (year) is a leap year, and false otherwise - DUCKDB_API static bool IsLeapYear(int32_t year); - - //! Returns true if the specified (year, month, day) combination is a valid - //! date - DUCKDB_API static bool IsValid(int32_t year, int32_t month, int32_t day); - - //! Returns true if the specified date is finite - static inline bool IsFinite(date_t date) { - return date != date_t::infinity() && date != date_t::ninfinity(); - } - - //! The max number of days in a month of a given year - DUCKDB_API static int32_t MonthDays(int32_t year, int32_t month); - - //! Extract the epoch from the date (seconds since 1970-01-01) - DUCKDB_API static int64_t Epoch(date_t date); - //! Extract the epoch from the date (nanoseconds since 1970-01-01) - DUCKDB_API static int64_t EpochNanoseconds(date_t date); - //! Extract the epoch from the date (microseconds since 1970-01-01) - DUCKDB_API static int64_t EpochMicroseconds(date_t date); - //! Convert the epoch (seconds since 1970-01-01) to a date_t - DUCKDB_API static date_t EpochToDate(int64_t epoch); - - //! Extract the number of days since epoch (days since 1970-01-01) - DUCKDB_API static int32_t EpochDays(date_t date); - //! Convert the epoch number of days to a date_t - DUCKDB_API static date_t EpochDaysToDate(int32_t epoch); - - //! Extract year of a date entry - DUCKDB_API static int32_t ExtractYear(date_t date); - //! Extract year of a date entry, but optimized to first try the last year found - DUCKDB_API static int32_t ExtractYear(date_t date, int32_t *last_year); - DUCKDB_API static int32_t ExtractYear(timestamp_t ts, int32_t *last_year); - //! Extract month of a date entry - DUCKDB_API static int32_t ExtractMonth(date_t date); - //! Extract day of a date entry - DUCKDB_API static int32_t ExtractDay(date_t date); - //! Extract the day of the week (1-7) - DUCKDB_API static int32_t ExtractISODayOfTheWeek(date_t date); - //! Extract the day of the year - DUCKDB_API static int32_t ExtractDayOfTheYear(date_t date); - //! Extract the ISO week number - //! ISO weeks start on Monday and the first week of a year - //! contains January 4 of that year. - //! In the ISO week-numbering system, it is possible for early-January dates - //! to be part of the 52nd or 53rd week of the previous year. - DUCKDB_API static void ExtractISOYearWeek(date_t date, int32_t &year, int32_t &week); - DUCKDB_API static int32_t ExtractISOWeekNumber(date_t date); - DUCKDB_API static int32_t ExtractISOYearNumber(date_t date); - //! Extract the week number as Python handles it. - //! Either Monday or Sunday is the first day of the week, - //! and any date before the first Monday/Sunday returns week 0 - //! This is a bit more consistent because week numbers in a year are always incrementing - DUCKDB_API static int32_t ExtractWeekNumberRegular(date_t date, bool monday_first = true); - //! Returns the date of the monday of the current week. - DUCKDB_API static date_t GetMondayOfCurrentWeek(date_t date); - - //! Helper function to parse two digits from a string (e.g. "30" -> 30, "03" -> 3, "3" -> 3) - DUCKDB_API static bool ParseDoubleDigit(const char *buf, idx_t len, idx_t &pos, int32_t &result); - - DUCKDB_API static string ConversionError(const string &str); - DUCKDB_API static string ConversionError(string_t str); - -private: - static void ExtractYearOffset(int32_t &n, int32_t &year, int32_t &year_offset); -}; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// // duckdb/common/arrow/arrow_converter.hpp // // @@ -21413,86 +21679,6 @@ class UUID { } }; -} // namespace duckdb -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/common/types/timestamp.hpp -// -// -//===----------------------------------------------------------------------===// - - - - - - - -namespace duckdb { - -//! The Timestamp class is a static class that holds helper functions for the Timestamp -//! type. -class Timestamp { -public: - // min timestamp is 290308-12-22 (BC) - constexpr static const int32_t MIN_YEAR = -290308; - constexpr static const int32_t MIN_MONTH = 12; - constexpr static const int32_t MIN_DAY = 22; - -public: - //! Convert a string in the format "YYYY-MM-DD hh:mm:ss[.f][-+TH[:tm]]" to a timestamp object - DUCKDB_API static timestamp_t FromString(const string &str); - //! Convert a string where the offset can also be a time zone string: / [A_Za-z0-9/_]+/ - //! If has_offset is true, then the result is an instant that was offset from UTC - //! If the tz is not empty, the result is still an instant, but the parts can be extracted and applied to the TZ - DUCKDB_API static bool TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset, - string_t &tz); - DUCKDB_API static bool TryConvertTimestamp(const char *str, idx_t len, timestamp_t &result); - DUCKDB_API static timestamp_t FromCString(const char *str, idx_t len); - //! Convert a date object to a string in the format "YYYY-MM-DD hh:mm:ss" - DUCKDB_API static string ToString(timestamp_t timestamp); - - DUCKDB_API static date_t GetDate(timestamp_t timestamp); - - DUCKDB_API static dtime_t GetTime(timestamp_t timestamp); - //! Create a Timestamp object from a specified (date, time) combination - DUCKDB_API static timestamp_t FromDatetime(date_t date, dtime_t time); - DUCKDB_API static bool TryFromDatetime(date_t date, dtime_t time, timestamp_t &result); - - //! Is the timestamp finite or infinite? - static inline bool IsFinite(timestamp_t timestamp) { - return timestamp != timestamp_t::infinity() && timestamp != timestamp_t::ninfinity(); - } - - //! Extract the date and time from a given timestamp object - DUCKDB_API static void Convert(timestamp_t date, date_t &out_date, dtime_t &out_time); - //! Returns current timestamp - DUCKDB_API static timestamp_t GetCurrentTimestamp(); - - //! Convert the epoch (in sec) to a timestamp - DUCKDB_API static timestamp_t FromEpochSeconds(int64_t ms); - //! Convert the epoch (in ms) to a timestamp - DUCKDB_API static timestamp_t FromEpochMs(int64_t ms); - //! Convert the epoch (in microseconds) to a timestamp - DUCKDB_API static timestamp_t FromEpochMicroSeconds(int64_t micros); - //! Convert the epoch (in nanoseconds) to a timestamp - DUCKDB_API static timestamp_t FromEpochNanoSeconds(int64_t micros); - - //! Convert the epoch (in seconds) to a timestamp - DUCKDB_API static int64_t GetEpochSeconds(timestamp_t timestamp); - //! Convert the epoch (in ms) to a timestamp - DUCKDB_API static int64_t GetEpochMs(timestamp_t timestamp); - //! Convert a timestamp to epoch (in microseconds) - DUCKDB_API static int64_t GetEpochMicroSeconds(timestamp_t timestamp); - //! Convert a timestamp to epoch (in nanoseconds) - DUCKDB_API static int64_t GetEpochNanoSeconds(timestamp_t timestamp); - - DUCKDB_API static bool TryParseUTCOffset(const char *str, idx_t &pos, idx_t len, int &hour_offset, - int &minute_offset); - - DUCKDB_API static string ConversionError(const string &str); - DUCKDB_API static string ConversionError(string_t str); -}; } // namespace duckdb //===----------------------------------------------------------------------===// // DuckDB @@ -21510,6 +21696,8 @@ class Timestamp { namespace duckdb { +struct dtime_t; + //! The Time class is a static class that holds helper functions for the Time //! type. class Time { @@ -21560,6 +21748,11 @@ class DuckDB; class TableCatalogEntry; class Connection; +enum class AppenderType : uint8_t { + LOGICAL, // Cast input -> LogicalType + PHYSICAL // Cast input -> PhysicalType +}; + //! The Appender class can be used to append elements to a table. class BaseAppender { protected: @@ -21575,10 +21768,14 @@ class BaseAppender { DataChunk chunk; //! The current column to append to idx_t column = 0; + //! The type of the appender + AppenderType appender_type; + +protected: + DUCKDB_API BaseAppender(Allocator &allocator, AppenderType type); + DUCKDB_API BaseAppender(Allocator &allocator, vector types, AppenderType type); public: - DUCKDB_API BaseAppender(Allocator &allocator); - DUCKDB_API BaseAppender(Allocator &allocator, vector types); DUCKDB_API virtual ~BaseAppender(); //! Begins a new row append, after calling this the other AppendX() functions @@ -21626,6 +21823,8 @@ class BaseAppender { void AppendValueInternal(T value); template void AppendValueInternal(Vector &vector, SRC input); + template + void AppendDecimalValueInternal(Vector &vector, SRC input); void AppendRowRecursive() { EndRow(); diff --git a/libduckdb-sys/upgrade.sh b/libduckdb-sys/upgrade.sh index b06b8838..75b81de8 100755 --- a/libduckdb-sys/upgrade.sh +++ b/libduckdb-sys/upgrade.sh @@ -10,7 +10,7 @@ export DUCKDB_LIB_DIR="$SCRIPT_DIR/duckdb" export DU_INCLUDE_DIR="$DUCKDB_LIB_DIR" # Download and extract amalgamation -DUCKDB_VERSION=v0.6.0 +DUCKDB_VERSION=v0.6.1 wget -T 20 "https://github.com/duckdb/duckdb/releases/download/$DUCKDB_VERSION/libduckdb-src.zip" unzip -o libduckdb-src.zip -d duckdb rm -f libduckdb-src.zip diff --git a/src/types/from_sql.rs b/src/types/from_sql.rs index d22f8ba7..b88cdb87 100644 --- a/src/types/from_sql.rs +++ b/src/types/from_sql.rs @@ -279,14 +279,14 @@ mod test { fn test_timestamp_raw() -> Result<()> { let db = Connection::open_in_memory()?; let sql = "BEGIN; - CREATE TABLE timestamp (sec TIMESTAMP_S, milli TIMESTAMP_MS, micro TIMESTAMP_US, nano TIMESTAMP_NS ); - INSERT INTO timestamp VALUES (NULL,NULL,NULL,NULL ); - INSERT INTO timestamp VALUES ('2008-01-01 00:00:01','2008-01-01 00:00:01.594','2008-01-01 00:00:01.88926','2008-01-01 00:00:01.889268321' ); - INSERT INTO timestamp VALUES (NULL,NULL,NULL,1199145601889268321 ); + CREATE TABLE ts (sec TIMESTAMP_S, milli TIMESTAMP_MS, micro TIMESTAMP_US, nano TIMESTAMP_NS ); + INSERT INTO ts VALUES (NULL,NULL,NULL,NULL ); + INSERT INTO ts VALUES ('2008-01-01 00:00:01','2008-01-01 00:00:01.594','2008-01-01 00:00:01.88926','2008-01-01 00:00:01.889268321' ); + -- INSERT INTO ts VALUES (NULL,NULL,NULL,1199145601889268321 ); END;"; db.execute_batch(sql)?; let v = db.query_row( - "SELECT sec, milli, micro, nano FROM timestamp WHERE sec is not null", + "SELECT sec, milli, micro, nano FROM ts WHERE sec is not null", [], |row| <(i64, i64, i64, i64)>::try_from(row), )?;