Skip to content

Commit

Permalink
Add hash_remove, vec_qselect, and avl_extra tests
Browse files Browse the repository at this point in the history
Improve wordcount test to break ties lexicographically when sorting
most frequent words
  • Loading branch information
hacatu committed Feb 11, 2024
1 parent 1c7ea86 commit 4546f4c
Show file tree
Hide file tree
Showing 6 changed files with 363 additions and 6 deletions.
189 changes: 189 additions & 0 deletions src/test/avl_extra.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <crater/avl_check.h>
#include <crater/avl.h>
#include <crater/sla.h>

/*
In this test, we first create an avl tree of uint8_t's node-by-node.
This tree will have all even numbers from 2 to 2*96 inclusive, but they
are inserted out of order.
Then we iterate over the tree using cr8r_avl_last and cr8r_avl_prev to check
it has the correct contents in the correct order.
Then we call cr8r_avl_lower_bound and cr8r_avl_upper_bound for every number on [0, 2*97)
(notice that some of these are not in the tree)
*/

static cr8r_avl_ft avlft;

// Compares a^5 and b^5 mod 97. x^5 is surjective mod 97, so this is basically
// a consistent random ordering on the numbers [0, 97)
static int cmp_u8_p5m97(const cr8r_base_ft*, const void *_a, const void *_b){
uint64_t a = *(const uint8_t*)_a, b = *(const uint8_t*)_b;
a = (a*a)*(a*a)*a%97;
b = (b*b)*(b*b)*b%97;
if(a < b){
return -1;
}else if(a > b){
return 1;
}
return 0;
}

static uint8_t seen_ents[(98 + 7)/8];

static void record_ents(cr8r_base_ft*, void *node){
uint8_t n = *(uint8_t*)((cr8r_avl_node*)node)->data;
if(n > 97){
n = 97;
}
seen_ents[n/8] |= 1ull << (n%8);
}

int main(){
cr8r_sla sla [[gnu::cleanup(cr8r_sla_delete)]] = {};
cr8r_sla_init(&sla, offsetof(cr8r_avl_node, data) + sizeof(uint8_t), 97);
cr8r_avl_ft_init(&avlft, &sla, sizeof(uint8_t), cr8r_default_cmp_u8, NULL, cr8r_default_alloc_sla, cr8r_default_free_sla);
cr8r_avl_node *root = NULL;
for(uint64_t n = 50;;){
uint8_t _n = n;
if(!cr8r_avl_insert(&root, &_n, &avlft)){
fprintf(stderr, "\e[1;31mERROR: avl_insert failed somehow!\e[0m\n");
exit(1);
}
n = n/2*5%97*2; // 5 is a generator modulo 97, so repeatedly multiplying any power of 5 mod 97 will generate all numbers besides 0
if(n == 50){
break;
}
}
// so now the avl tree should have all the even numbers from 2 to 192 inclusive
uint8_t n = 96*2;
for(cr8r_avl_node *it = cr8r_avl_last(root); it; it = cr8r_avl_prev(it)){
if(n != *(uint8_t*)it->data){
fprintf(stderr, "\e[1;31mFailure: reverse inorder traversal is not correct!\e[0m\n");
}
n -= 2;
}
for(n = 0; n < 2; ++n){
cr8r_avl_node *it = cr8r_avl_lower_bound(root, &n, &avlft);
if(it){
fprintf(stderr, "\e[1;31mFailure: avl_lower_bound returned bogus node for n < min (should be NULL)\e[0m\n");
}
it = cr8r_avl_upper_bound(root, &n, &avlft);
if(!it ||*(uint8_t*)it->data != 2){
fprintf(stderr, "\e[1;31mFailure: avl_upper_bound returned wrong node for %"PRIu8"\e[0m\n", n);
}
}
for(n = 2; n < 96*2; ++n){
cr8r_avl_node *it = cr8r_avl_lower_bound(root, &n, &avlft);
if(!it || *(uint8_t*)it->data/2 != n/2){
fprintf(stderr, "\e[1;31mFailure: avl_lower_bound returned wrong node for %"PRIu8"\e[0m\n", n);
}
it = cr8r_avl_upper_bound(root, &n, &avlft);
if(!it || *(uint8_t*)it->data/2 != n/2 + 1){
fprintf(stderr, "\e[1;31mFailure: avl_upper_bound returned wrong node for %"PRIu8"\e[0m\n", n);
}
}
for(n = 96*2; n < 97*2; ++n){
cr8r_avl_node *it = cr8r_avl_lower_bound(root, &n, &avlft);
if(!it || *(uint8_t*)it->data != 96*2){
fprintf(stderr, "\e[1;31mFailure: avl_lower_bound returned wrong node for %"PRIu8"\e[0m\n", n);
}
it = cr8r_avl_upper_bound(root, &n, &avlft);
if(it){
fprintf(stderr, "\e[1;31mFailure: avl_upper_bound returned bogus node for n < min (should be NULL)\e[0m\n");
}
}
for(n = 1; n < 97*2; ++n){
cr8r_avl_node *it = cr8r_avl_search(root, &n, &avlft);
if(!it){
fprintf(stderr, "\e[1;31mFailure: avl_search returned NULL\e[0m\n");
}else if(n&1){
uint8_t m = *(uint8_t*)it->data;
if(m + 1 != n && n + 1 != m){
fprintf(stderr, "\e[1;31mFailure: avl_search failed\e[0m\n");
}
}else if(*(uint8_t*)it->data != n){
fprintf(stderr, "\e[1;31mFailure: avl_search failed\e[0m\n");
}
}
for(n = 2; n < 97*2; n += 2){
cr8r_avl_node *it = cr8r_avl_get(root, &n, &avlft);
if(!it){
fprintf(stderr, "\e[1;31mFailure: couldn't get node to increase\e[0m\n");
}else{
*(uint8_t*)it->data += 29;
if(!(it = cr8r_avl_increase(it, &avlft, NULL))){
fprintf(stderr, "\e[1;31mFailure: avl_increase falsely detected duplicate\e[0m\n]");
}else{
root = it;
CR8R_AVL_ASSERT_ALL(root);
}
}
}
n = 2 + 29;
for(cr8r_avl_node *it = cr8r_avl_first(root); it; it = cr8r_avl_next(it)){
if(n != *(uint8_t*)it->data){
fprintf(stderr, "\e[1;31mFailure: avl_increase broke the tree!\e[0m\n");
}
n += 2;
}
for(n = 2 + 29; n < 97*2 + 29; n += 2){
cr8r_avl_node *it = cr8r_avl_get(root, &n, &avlft);
if(!it){
fprintf(stderr, "\e[1;31mFailure: couldn't get node to decrease\e[0m\n");
}else{
*(uint8_t*)it->data -= 29;
if(!(it = cr8r_avl_decrease(it, &avlft, NULL))){
fprintf(stderr, "\e[1;31mFailure: avl_decrease falsely detected duplicate\e[0m\n]");
}else{
root = it;
CR8R_AVL_ASSERT_ALL(root);
}
}
}
n = 2;
for(cr8r_avl_node *it = cr8r_avl_first(root); it; it = cr8r_avl_next(it)){
if(n != *(uint8_t*)it->data){
fprintf(stderr, "\e[1;31mFailure: avl_decrease broke the tree!\e[0m\n");
}
n += 2;
}
n = 0;
for(cr8r_avl_node *it = cr8r_avl_first(root); it; it = cr8r_avl_next(it)){
*(uint8_t*)it->data = n++;
}
avlft.cmp = cmp_u8_p5m97;
cr8r_avl_reorder(root, &avlft);
CR8R_AVL_ASSERT_ALL(root);
n = 0;
for(cr8r_avl_node *it = cr8r_avl_first(root), *pd = NULL; it; pd = it, it = cr8r_avl_next(it)){
if(pd && cmp_u8_p5m97(&avlft.base, pd->data, it->data) >= 0){
fprintf(stderr, "\e[1;31mFailure: avl_reorder did not restore order!\e[0m\n");
}
++n;
}
if(n != 96){
fprintf(stderr, "\e[1;31mFailure: avl_reorder'd tree has %"PRIu8" nodes instead of 96\e[0m\n", n);
}
if((n = *(uint8_t*)cr8r_avl_first(root)->data)){
fprintf(stderr, "\e[1;31mFailure: avl_reorder made %"PRIu8" the first element instead of 0\e[0m\n", n);
}
avlft.free = record_ents;
cr8r_avl_delete(root, &avlft);
for(uint8_t n = 0; n < 96; ++n){
if(!(seen_ents[n/8] & (1ull << (n%8)))){
fprintf(stderr, "\e[1;31mFailure: avl_delete missed node %"PRIu8"!\e[0m\n", n);
}
}
n = 97;
if(seen_ents[n/8] & (1ull << (n%8))){
fprintf(stderr, "\e[1;31mFailure: avl_delete encountered some extradimensional node :O\e[0m\n");
}
}

89 changes: 89 additions & 0 deletions src/test/set_diff.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <crater/hash.h>
#include <crater/vec.h>

static cr8r_hashtbl_ft ft = {
.base.size = sizeof(int64_t),
.cmp = cr8r_default_cmp_i64,
.hash = cr8r_default_hash_u64,
.load_factor = 0.5
};

static cr8r_vec_ft vecft_i64 = {
.base.size = sizeof(int64_t),
.new_size = cr8r_default_new_size,
.resize = cr8r_default_resize,
.cmp = cr8r_default_cmp_i64,
.swap = cr8r_default_swap
};

int main(){
cr8r_hashtbl_t numbers;
if(!cr8r_hash_init(&numbers, &ft, 8)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate hashtable\e[0m\n");
exit(1);
}
cr8r_vec removed;
if(!cr8r_vec_init(&removed, &vecft_i64, 64)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate vector\e[0m\n");
exit(1);
}
int64_t *rm_it = NULL;
int64_t n;
for(n = 0; removed.len < 7; ++n){
if(numbers.table_b && !rm_it){
rm_it = numbers.table_b;
}
if(rm_it){
if(rm_it == numbers.table_b && numbers.flags_b[0]&0x100000000ULL){// we are in the first slot and it is occupied
if(!cr8r_vec_pushr(&removed, &vecft_i64, rm_it)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate memory\e[0m\n");
exit(1);
}
int64_t tmp = *rm_it;
cr8r_hash_remove(&numbers, &ft, &tmp);// force re-search for rm_it, to confirm remove_split works
}else{
rm_it = cr8r_hash_next(&numbers, &ft, rm_it);
if((void*)rm_it <= numbers.table_b || (numbers.table_a > numbers.table_b && (void*)rm_it >= numbers.table_a)){// we are done with table_b
while(numbers.table_b){
cr8r_hash_get(&numbers, &ft, &n);// force the hash table to finish the incremental move process if needed
}
rm_it = NULL;
}else{
if(!cr8r_vec_pushr(&removed, &vecft_i64, rm_it)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate memory\e[0m\n");
exit(1);
}
int64_t tmp = *rm_it;
cr8r_hash_remove(&numbers, &ft, &tmp);// force re-search for rm_it, to confirm remove_split works
}
}
}
if(!cr8r_hash_insert(&numbers, &ft, &n, NULL)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate memory\e[0m\n");
exit(1);
}
}
for(rm_it = cr8r_hash_next(&numbers, &ft, NULL); rm_it; rm_it = cr8r_hash_next(&numbers, &ft, rm_it)){
if(!cr8r_vec_pushr(&removed, &vecft_i64, rm_it)){
fprintf(stderr, "\e[1;31mERROR: Could not allocate memory\e[0m\n");
exit(1);
}
cr8r_hash_delete(&numbers, &ft, rm_it);
}
cr8r_hash_destroy(&numbers, &ft);
cr8r_vec_sort(&removed, &vecft_i64);// TODO: use i64 cmp
for(int64_t i = 0; i < n; ++i){
if(i != *(int64_t*)cr8r_vec_get(&removed, &vecft_i64, i)){
fprintf(stderr, "\e[1;31mERROR: hash insert/remove didn't preserve values!\e[0m\n");
exit(1);
}
}
fprintf(stderr, "\e[1;32mSuccess: hash insert/remove worked as expected!\e[0m\n");
cr8r_vec_delete(&removed, &vecft_i64);
}

58 changes: 58 additions & 0 deletions src/test/vec_qselect.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,54 @@ static int test_pivot_mm(cr8r_vec *vec, cr8r_vec_ft *ft, cr8r_prng *prng){
return 1;
}

static int test_pivot_m3(cr8r_vec *vec, cr8r_vec_ft *ft, cr8r_prng *prng){
for(uint64_t i = 0; i < 1000; ++i){
cr8r_vec_shuffle(vec, ft, prng);
uint64_t *it = cr8r_vec_pivot_m3(vec, ft, 0, vec->len);
uint64_t *fst = vec->buf;
uint64_t *lst = fst + (vec->len - 1);
uint64_t *mid = fst + (vec->len - 1)/2;
if(it == fst){
if((*lst > *fst && *mid > *fst) || (*lst < *fst && *mid < *fst)){
fprintf(stderr, "\e[1;31mInvalid pivot on trial %"PRIu64"/1000!\e[0m\n", i);
return 0;
}
}else if(it == mid){
if((*fst > *mid && *lst > *mid) || (*fst < *mid && *lst < *mid)){
fprintf(stderr, "\e[1;31mInvalid pivot on trial %"PRIu64"/1000!\e[0m\n", i);
return 0;
}
}else if(it == lst){
if((*fst > *lst && *mid > *lst) || (*fst < *lst && *mid < *lst)){
fprintf(stderr, "\e[1;31mInvalid pivot on trial %"PRIu64"/1000!\e[0m\n", i);
return 0;
}
}else{
fprintf(stderr, "\e[1;31mInvalid pivot on trial %"PRIu64"/1000 (not first, middle, or last)!\e[0m\n", i);
return 0;
}
}
for(uint64_t i = 0; i < 100; ++i){
cr8r_vec_shuffle(vec, ft, prng);
uint64_t *it = cr8r_vec_pivot_m3(vec, ft, 0, 2);
if(it != vec->buf){
fprintf(stderr, "\e[1;31mDidn't select first element as pivot in short vec on trial %"PRIu64"/100!\e[0m\n", i);
return 0;
}
it = cr8r_vec_pivot_m3(vec, ft, 0, 0);
if(it){
fprintf(stderr, "\e[1;31mDidn't recognize empty range on trial %"PRIu64"/100!\e[0m\n", i);
return 0;
}
it = cr8r_vec_pivot_m3(vec, ft, vec->len, vec->len + 1);
if(it){
fprintf(stderr, "\e[1;31mDidn't recognize out of bound range on trial %"PRIu64"/100!\e[0m\n", i);
return 0;
}
}
return 1;
}

static int test_partition(cr8r_vec *vec, cr8r_vec_ft *ft, cr8r_prng *prng){
for(uint64_t i = 0; i < 1000; ++i){
cr8r_vec_shuffle(vec, ft, prng);
Expand Down Expand Up @@ -124,6 +172,16 @@ int main(){
fprintf(stderr, "\e[1;31mcr8r_vec_pivot_mm failed!\e[0m\n");
}

fprintf(stderr, "\e[1;34mTesting cr8r_vec_pivot_m3 1000x on a 1000 element array\e[0m\n");
status = test_pivot_m3(&vec, &ft, prng);
++tested;
if(status){
fprintf(stderr, "\e[1;32mcr8r_vec_pivot_m3 succeeded!\e[0m\n");
++passed;
}else{
fprintf(stderr, "\e[1;31mcr8r_vec_pivot_m3 failed!\e[0m\n");
}

fprintf(stderr, "\e[1;34mTesting cr8r_vec_ith 1000x on a 1000 element array\e[0m\n");
status = test_ith(&vec, &ft, prng);
++tested;
Expand Down
17 changes: 16 additions & 1 deletion src/test/wordcount.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ int combine_u64_then_free(cr8r_base_ft *ft, void *_e, void *_i){
}

int cmp_counts(const cr8r_base_ft *ft, const void *_a, const void *_b){
return cr8r_default_cmp_u64(ft, _a + offsetof(word_count, count), _b + offsetof(word_count, count));
// break ties in count using alphabetical order on word, to avoid instability on order of words with the same count
return cr8r_default_cmp_u64(ft, _a + offsetof(word_count, count), _b + offsetof(word_count, count))
?: -cr8r_default_cmp_cstr(ft, _a + offsetof(word_count, word), _b + offsetof(word_count, word));
}

cr8r_hashtbl_ft htft_wc = {
Expand Down Expand Up @@ -159,6 +161,19 @@ int main(int argc, char **argv){
munmap(text_buf, text_len);

fprintf(stderr, "\e[1;34mHash table found %"PRIu64" unique words\e[0m\n", wordcount_ht.full);
uint64_t avl_len = 0;
for(cr8r_avl_node *it = cr8r_avl_first(wordcount_avl); it; it = cr8r_avl_next(it)){
++avl_len;
word_count *wc = cr8r_hash_get(&wordcount_ht, &htft_wc, it->data);
if(!wc){
fprintf(stderr, "\e[1;31m\"%s\" found in avl wordcount but not hash table!\e[0m\n", ((word_count*)it->data)->word);
}else if(wc->count != ((word_count*)it->data)->count){
fprintf(stderr, "\e[1;31mavl/hash table counts did not match for \"%s\"\e[0m\n", wc->word);
}
}
if(avl_len != wordcount_ht.full){
fprintf(stderr, "\e[1;31mAvl tree found different number of words (%"PRIu64")\e[0m\n", avl_len);
}
cr8r_vec wordcount_vec;
if(!cr8r_vec_init(&wordcount_vec, &vecft_wc, wordcount_ht.full)){
cr8r_hash_destroy(&wordcount_ht, &htft_wc);
Expand Down
Loading

0 comments on commit 4546f4c

Please sign in to comment.