Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Serialization for khash map #76

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6800c6f
Add writing
dnbaker Sep 27, 2016
cbeb0bc
Merge branch 'master' of https://github.com/noseatbelts/klib
dnbaker Sep 27, 2016
061969d
Add write/load functions.
dnbaker Nov 6, 2016
3277f05
Update khash.h
dnbaker Mar 18, 2017
494da2e
Update write/read functions to work for both sets and maps.
dnbaker Mar 18, 2017
a6755a4
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Apr 27, 2017
9f5e92e
Add kputuw_, kputw_, and kputl_ functions (which don't set 0).
dnbaker Apr 27, 2017
4375b21
Eliminate -Wsign-compare.
dnbaker Apr 28, 2017
121adbd
Modify kputuw_.
dnbaker Jun 15, 2017
309d5e5
Add HAS_KPUTUW__ macro for checking for function definition.
dnbaker Jun 15, 2017
1de0874
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Jun 15, 2017
69dc025
Finished moving things around.
dnbaker Jun 15, 2017
8598156
Merge branch 'master' of https://github.com/dnbh/klib
dnbaker Jun 15, 2017
0089a12
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Feb 3, 2018
751f62a
save
dnbaker Apr 30, 2018
3597c24
Merge branch 'master' of https://github.com/noseatbelts/klib
dnbaker Apr 30, 2018
384eea5
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Apr 30, 2018
2327449
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Aug 24, 2018
3e30f52
Allow switch to 64-bit map using -DKH_USE_64_BIT
dnbaker Aug 24, 2018
f6247bb
Eliminate second definition of kh_write.
dnbaker Aug 24, 2018
718511e
Save changes.
dnbaker Aug 24, 2018
3122f7e
Save stuff.
dnbaker Aug 24, 2018
8fbee11
Resolve.
dnbaker Aug 24, 2018
6694ef8
Patch.
dnbaker Aug 24, 2018
16d6366
Remove unrelated changes.
dnbaker Aug 25, 2018
f0f7639
Update documentation. Add serialize method.
dnbaker Aug 26, 2018
653e8d8
Merge branch 'master' of https://github.com/attractivechaos/klib
dnbaker Jan 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 88 additions & 8 deletions khash.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,13 @@ typedef unsigned long long khint64_t;
#endif
#endif /* klib_unused */

#if KH_USE_64_BIT
typedef khint64_t khint_t;
typedef khint_t khiter_t;
#else
typedef khint32_t khint_t;
typedef khint_t khiter_t;
#endif

#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
Expand All @@ -169,6 +174,7 @@ typedef khint_t khiter_t;
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
#define __ac_fw(item, fp) (fwrite(&(item), 1, sizeof(item), fp))

#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)

Expand Down Expand Up @@ -199,14 +205,18 @@ static const double __ac_HASH_UPPER = 0.77;
khval_t *vals; \
} kh_##name##_t;

#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
extern kh_##name##_t *kh_init_##name(void); \
extern void kh_destroy_##name(kh_##name##_t *h); \
extern void kh_clear_##name(kh_##name##_t *h); \
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
extern kh_##name##_t *kh_init_##name(void); \
extern void kh_destroy_##name(kh_##name##_t *h); \
extern void kh_clear_##name(kh_##name##_t *h); \
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret);\
extern void kh_del_##name(kh_##name##_t *h, khint_t x); \
extern kh_##name##_t *kh_deserialize_##name(const char *path); \
extern int kh_serialize_##name(kh_##name##_t *h, const char *path); \
extern kh_##name##_t *kh_read_##name(kh_##name##_t *dest, FILE *fp); \
extern void kh_write_##name(kh_##name##_t *map, FILE *path); \

#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
SCOPE kh_##name##_t *kh_init_##name(void) { \
Expand Down Expand Up @@ -352,7 +362,47 @@ static const double __ac_HASH_UPPER = 0.77;
__ac_set_isdel_true(h->flags, x); \
--h->size; \
} \
}
} \
SCOPE void kh_write_##name(kh_##name##_t *map, FILE *fp) { \
__ac_fw(map->n_buckets, fp); \
__ac_fw(map->n_occupied, fp); \
__ac_fw(map->size, fp); \
__ac_fw(map->upper_bound, fp); \
fwrite(map->flags, __ac_fsize(map->n_buckets), sizeof(khint32_t), fp);\
fwrite(map->keys, map->n_buckets, sizeof(khkey_t), fp); \
if(kh_is_map) fwrite(map->vals, map->n_buckets, sizeof(khval_t), fp); \
} \
SCOPE kh_##name##_t *kh_read_##name(kh_##name##_t *dest, FILE *fp) {\
fread(&dest->n_buckets, sizeof(dest->n_buckets), 1, fp); \
fread(&dest->n_occupied, sizeof(dest->n_occupied), 1, fp); \
fread(&dest->size, sizeof(dest->size), 1, fp); \
fread(&dest->upper_bound, sizeof(dest->upper_bound), 1, fp); \
dest->flags = (khint32_t *)malloc(sizeof(khint32_t) * __ac_fsize(dest->n_buckets));\
fread(dest->flags, sizeof(khint32_t), __ac_fsize(dest->n_buckets), fp);\
dest->keys = (khkey_t *)malloc(sizeof(khkey_t) * dest->n_buckets); \
fread(dest->keys, sizeof(khkey_t), dest->n_buckets, fp); \
dest->vals = kh_is_map ? (khval_t *)malloc(sizeof(khval_t) * dest->n_buckets) : 0; \
if(kh_is_map) fread(dest->vals, 1, dest->n_buckets * sizeof(*dest->vals), fp); \
return dest; \
}\
SCOPE int kh_serialize_##name(kh_##name##_t *h, const char *path) \
{ \
FILE *fp; \
if((fp = fopen(path, "wb")) == NULL) return -1; \
kh_write_##name(h, fp); \
fclose(fp); \
return 0; \
}\
SCOPE kh_##name##_t *kh_deserialize_##name(const char *path) \
{ \
FILE *fp; \
kh_##name##_t *ret; \
ret = (kh_##name##_t *)calloc(1, sizeof(kh_##name##_t)); \
fp = fopen(path, "rb"); \
ret = kh_read_##name(ret, fp); \
fclose(fp); \
return ret; \
}

#define KHASH_DECLARE(name, khkey_t, khval_t) \
__KHASH_TYPE(name, khkey_t, khval_t) \
Expand Down Expand Up @@ -490,6 +540,36 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
*/
#define kh_del(name, h, k) kh_del_##name(h, k)

/*! @function
@abstract Read a hash map from disk
@param h Pointer to the hash table [khash_t(name)*]
@param path File pointer, opened for writing. [FILE *]
*/
#define kh_read(name, h, fp) kh_read_##name(h, fp)

/*! @function
@abstract Write a hash map in raw binary form to a file pointer.
@param h Pointer to the hash table [khash_t(name)*]
@param path File pointer, opened for reading [const char *]
*/
#define kh_write(name, h, fp) kh_write_##name(h, fp)

/*! @function
@abstract Load a hash table from disk
@param name Name of the hash table [symbol]
@param path Path to file from which to load [const char *]
*/

#define kh_deserialize(name, path) kh_deserialize_##name(path)

/*! @function
@abstract Write a hash table from disk
@param name Name of the hash table [symbol]
@param path Path to file to which to serialize [const char *]
*/

#define kh_serialize(name, h, path) kh_serialize_##name(h, path)

/*! @function
@abstract Test whether a bucket contains data.
@param h Pointer to the hash table [khash_t(name)*]
Expand Down
2 changes: 1 addition & 1 deletion kstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
va_copy(args, ap);
l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
va_end(args);
if (l + 1 > s->m - s->l) {
if ((unsigned)(l + 1) > s->m - s->l) {
s->m = s->l + l + 2;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
Expand Down
69 changes: 69 additions & 0 deletions kstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,29 @@ static inline int kputw(int c, kstring_t *s)
return 0;
}


static inline int kputw_(int c, kstring_t *s)
{
char buf[16];
int i, l = 0;
unsigned int x = c;
if (c < 0) x = -x;
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
if (c < 0) buf[l++] = '-';
if (s->l + l + 1 >= s->m) {
char *tmp;
s->m = s->l + l + 2;
kroundup32(s->m);
if ((tmp = (char*)realloc(s->s, s->m)))
s->s = tmp;
else
return EOF;
}
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
return 0;
}


static inline int kputuw(unsigned c, kstring_t *s)
{
char buf[16];
Expand All @@ -241,6 +264,30 @@ static inline int kputuw(unsigned c, kstring_t *s)
return 0;
}

#ifndef HAS_KPUTUW__
#define HAS_KPUTUW__
static inline int kputuw_(unsigned c, kstring_t *s)
{
char buf[16];
int l, i;
unsigned x;
if (c == 0) return kputc('0', s);
for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
if (s->l + l + 1 >= s->m) {
char *tmp;
s->m = s->l + l + 2;
kroundup32(s->m);
if ((tmp = (char*)realloc(s->s, s->m)))
s->s = tmp;
else
return EOF;
}
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
return 0;
}
#endif


static inline int kputl(long c, kstring_t *s)
{
char buf[32];
Expand All @@ -263,6 +310,28 @@ static inline int kputl(long c, kstring_t *s)
return 0;
}


static inline int kputl_(long c, kstring_t *s)
Copy link

@justinmk justinmk Aug 24, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This duplicates a lot of code and it's not clear how it's related to the serialization.

kputl could call kputl_.

Copy link
Contributor Author

@dnbaker dnbaker Aug 24, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unrelated to serialization. I added this so that I'd have the option of integer formatting routines which did not null-terminate for cases where I knew I would be appending to the string further. I can separate that out from this pull request later if requested. Compare kputw and kputw_ which were already present, for example. I'm not quite sure I understand why only some types were provided separate functions.

{
char buf[32];
int i, l = 0;
unsigned long x = c;
if (c < 0) x = -x;
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
if (c < 0) buf[l++] = '-';
if (s->l + l + 1 >= s->m) {
char *tmp;
s->m = s->l + l + 2;
kroundup32(s->m);
if ((tmp = (char*)realloc(s->s, s->m)))
s->s = tmp;
else
return EOF;
}
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
return 0;
}

/*
* Returns 's' split by delimiter, with *n being the number of components;
* NULL on failue.
Expand Down