Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(query): improve hash join #12928

Merged
merged 37 commits into from
Oct 8, 2023
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c5b01f3
improve hash join
Dousir9 Sep 19, 2023
d91102c
improve concat
Dousir9 Sep 19, 2023
81dd24b
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 19, 2023
43f6395
improve take_string and add take_boolean
Dousir9 Sep 20, 2023
0a28726
fix
Dousir9 Sep 20, 2023
1cf2e82
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 20, 2023
332b435
improve concat
Dousir9 Sep 21, 2023
f1c83ad
improve concat_string_types
Dousir9 Sep 21, 2023
83c6586
improve take
Dousir9 Sep 21, 2023
491df68
improve filter
Dousir9 Sep 22, 2023
e385a09
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 22, 2023
83c8cd9
update
Dousir9 Sep 22, 2023
b035116
remove get_function_context
Dousir9 Sep 22, 2023
82dd7ac
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 22, 2023
0e8e35a
improve settings
Dousir9 Sep 22, 2023
a4ccedd
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 23, 2023
9790039
allow too_many_arguments
Dousir9 Sep 23, 2023
0101b02
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 25, 2023
f992e23
merge
Dousir9 Sep 25, 2023
e4c98f5
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 26, 2023
4cbbc8a
merge
Dousir9 Sep 26, 2023
9ca0292
refine primitive comments
Dousir9 Sep 26, 2023
de681b2
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 27, 2023
a0bdf31
refine
Dousir9 Sep 27, 2023
c568805
refine
Dousir9 Sep 27, 2023
1bcfbf8
refine take_compact
Dousir9 Sep 27, 2023
c07f88f
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 27, 2023
62176e6
fix take_compact
Dousir9 Sep 27, 2023
6a15217
add safety comment
Dousir9 Sep 27, 2023
bd0d64e
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Sep 27, 2023
50a17b7
fix take_compact_string
Dousir9 Sep 27, 2023
5884ad9
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Oct 1, 2023
ed5bdc7
refine: use extend from iter and get_unchecked_mut
Dousir9 Oct 2, 2023
ec6f001
Merge branch 'main' of github.com:datafuselabs/databend into improve_…
Dousir9 Oct 8, 2023
82fb00a
refine concat_primitive_types
Dousir9 Oct 8, 2023
93033cc
reduce pr size
Dousir9 Oct 8, 2023
26b8221
reduce pr size
Dousir9 Oct 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 10 additions & 18 deletions src/common/hashtable/src/hashjoin_hashtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ where

fn contains(&self, key_ref: &Self::Key) -> bool {
let index = key_ref.hash() as usize & self.hash_mask;
let mut raw_entry_ptr = self.pointers[index];
Dousir9 marked this conversation as resolved.
Show resolved Hide resolved
// # Safety
// `index` = hash & mask, it is less than the capacity of hash table.
let mut raw_entry_ptr = unsafe { *self.pointers.get_unchecked(index) };
loop {
if raw_entry_ptr == 0 {
break;
Expand All @@ -132,22 +134,18 @@ where
) -> (usize, u64) {
let index = key_ref.hash() as usize & self.hash_mask;
let origin = occupied;
let mut raw_entry_ptr = self.pointers[index];
// # Safety
// `index` = hash & mask, it is less than the capacity of hash table.
let mut raw_entry_ptr = unsafe { *self.pointers.get_unchecked(index) };
loop {
if raw_entry_ptr == 0 || occupied >= capacity {
break;
}
let raw_entry = unsafe { &*(raw_entry_ptr as *mut RawEntry<K>) };
let raw_entry = unsafe { &*(raw_entry_ptr as *const RawEntry<K>) };
if key_ref == &raw_entry.key {
// # Safety
// occupied is less than the capacity of vec_ptr.
unsafe {
std::ptr::copy_nonoverlapping(
&raw_entry.row_ptr as *const RowPtr,
vec_ptr.add(occupied),
1,
)
};
unsafe { std::ptr::write(vec_ptr.add(occupied), raw_entry.row_ptr) };
occupied += 1;
}
raw_entry_ptr = raw_entry.next;
Expand All @@ -172,17 +170,11 @@ where
if incomplete_ptr == 0 || occupied >= capacity {
break;
}
let raw_entry = unsafe { &*(incomplete_ptr as *mut RawEntry<K>) };
let raw_entry = unsafe { &*(incomplete_ptr as *const RawEntry<K>) };
if key_ref == &raw_entry.key {
// # Safety
// occupied is less than the capacity of vec_ptr.
unsafe {
std::ptr::copy_nonoverlapping(
&raw_entry.row_ptr as *const RowPtr,
vec_ptr.add(occupied),
1,
)
};
unsafe { std::ptr::write(vec_ptr.add(occupied), raw_entry.row_ptr) };
occupied += 1;
}
incomplete_ptr = raw_entry.next;
Expand Down
28 changes: 10 additions & 18 deletions src/common/hashtable/src/hashjoin_string_hashtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ where A: Allocator + Clone + 'static

fn contains(&self, key_ref: &Self::Key) -> bool {
let index = key_ref.fast_hash() as usize & self.hash_mask;
let mut raw_entry_ptr = self.pointers[index];
// # Safety
// `index` = hash & mask, it is less than the capacity of hash table.
let mut raw_entry_ptr = unsafe { *self.pointers.get_unchecked(index) };
loop {
if raw_entry_ptr == 0 {
break;
Expand Down Expand Up @@ -125,12 +127,14 @@ where A: Allocator + Clone + 'static
) -> (usize, u64) {
let index = key_ref.fast_hash() as usize & self.hash_mask;
let origin = occupied;
let mut raw_entry_ptr = self.pointers[index];
// # Safety
// `index` = hash & mask, it is less than the capacity of hash table.
let mut raw_entry_ptr = unsafe { *self.pointers.get_unchecked(index) };
loop {
if raw_entry_ptr == 0 || occupied >= capacity {
break;
}
let raw_entry = unsafe { &*(raw_entry_ptr as *mut StringRawEntry) };
let raw_entry = unsafe { &*(raw_entry_ptr as *const StringRawEntry) };
// Compare `early` and the length of the string, the size of `early` is 4.
let min_len = std::cmp::min(STRING_EARLY_SIZE, key_ref.len());
if raw_entry.length as usize == key_ref.len()
Expand All @@ -145,13 +149,7 @@ where A: Allocator + Clone + 'static
if key == key_ref {
// # Safety
// occupied is less than the capacity of vec_ptr.
unsafe {
std::ptr::copy_nonoverlapping(
&raw_entry.row_ptr as *const RowPtr,
vec_ptr.add(occupied),
1,
)
};
unsafe { std::ptr::write(vec_ptr.add(occupied), raw_entry.row_ptr) };
occupied += 1;
}
}
Expand All @@ -177,7 +175,7 @@ where A: Allocator + Clone + 'static
if incomplete_ptr == 0 || occupied >= capacity {
break;
}
let raw_entry = unsafe { &*(incomplete_ptr as *mut StringRawEntry) };
let raw_entry = unsafe { &*(incomplete_ptr as *const StringRawEntry) };
// Compare `early` and the length of the string, the size of `early` is 4.
let min_len = std::cmp::min(STRING_EARLY_SIZE, key_ref.len());
if raw_entry.length as usize == key_ref.len()
Expand All @@ -192,13 +190,7 @@ where A: Allocator + Clone + 'static
if key == key_ref {
// # Safety
// occupied is less than the capacity of vec_ptr.
unsafe {
std::ptr::copy_nonoverlapping(
&raw_entry.row_ptr as *const RowPtr,
vec_ptr.add(occupied),
1,
)
};
unsafe { std::ptr::write(vec_ptr.add(occupied), raw_entry.row_ptr) };
occupied += 1;
}
}
Expand Down
Loading
Loading