Skip to content

Commit

Permalink
Merge dbreader + dbwriter + switch to die macro
Browse files Browse the repository at this point in the history
  • Loading branch information
btrkeks committed May 3, 2024
1 parent d98691b commit cd618e9
Show file tree
Hide file tree
Showing 15 changed files with 341 additions and 379 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ DEBUG_CFLAGS=-DDEBUG \
-Og -ggdb
RELEASE_CFLAGS=-O3 -flto -march=native

FILES=dictpopup.c util.c platformdep.c deinflector.c settings.c dbreader.c ankiconnectc.c database.c jppron.c pdjson.c
FILES_H=ankiconnectc.h dbreader.h deinflector.h gtk3popup.h settings.h util.h platformdep.h database.h jppron.h pdjson.h
FILES=dictpopup.c util.c platformdep.c deinflector.c settings.c db.c ankiconnectc.c database.c jppron.c pdjson.c
FILES_H=ankiconnectc.h db.h deinflector.h gtk3popup.h settings.h util.h platformdep.h database.h jppron.h pdjson.h
SRC=$(addprefix $(SDIR)/,$(FILES))
SRC_H=$(addprefix $(IDIR)/,$(FILES_H))

Expand All @@ -38,8 +38,8 @@ CFLAGS_CREATE=-I$(IDIR) -isystem$(LIBDIR)/lmdb/libraries/liblmdb -D_POSIX_C_SOUR
LDLIBS_CREATE=-ffunction-sections -fdata-sections -Wl,--gc-sections \
-lzip $(shell pkg-config --libs glib-2.0) -llmdb

FILES_CREATE=dbwriter.c pdjson.c util.c settings.c
FILES_H_CREATE=dbwriter.h pdjson.h util.h buf.h settings.h
FILES_CREATE=db.c pdjson.c util.c settings.c
FILES_H_CREATE=db.h pdjson.h util.h buf.h settings.h

SRC_CREATE=$(addprefix $(SDIR)/,$(FILES_CREATE)) $(LMDB_FILES)
SRC_H_CREATE=$(addprefix $(IDIR)/,$(FILES_H_CREATE))
Expand Down
40 changes: 40 additions & 0 deletions include/db.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef DP_DB_H
#define DP_DB_H

#include <stdbool.h>

#include <lmdb.h>

#include "util.h"

// A (not so) opaque struct
typedef struct database_s database_t;

database_t _nonnull_ db_open(char *dbpath, bool readonly);
void _nonnull_ db_close(database_t *db);

void _nonnull_ db_put_dictent(database_t *db, s8 headword, dictentry de);
void _nonnull_ db_get_dictents(database_t *db, s8 headword, dictentry *dict[static 1]);

void _nonnull_ db_put_freq(database_t *db, s8 word, s8 reading, u32 freq);
int _nonnull_ db_get_freq(database_t *db, s8 word, s8 reading);


/*
* Checks if there exists a database in the provided path
*/
i32 db_check_exists(s8 dbpath);


struct database_s {
MDB_env *env;
MDB_dbi dbi1;
MDB_dbi dbi2;
MDB_dbi dbi3;
MDB_txn *txn;
stringbuilder_s lastdatastr;
u32 last_id;
bool readonly;
};

#endif
17 changes: 0 additions & 17 deletions include/dbreader.h

This file was deleted.

7 changes: 0 additions & 7 deletions include/dbwriter.h

This file was deleted.

33 changes: 22 additions & 11 deletions include/dictpopup.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
#ifndef DP_DICTPOPUP_H
#define DP_DICTPOPUP_H

#include "util.h"

// Opaque type
typedef struct dictpopup_s dictpopup_t;


dictpopup_t dictpopup_init(int argc, char **argv);

/*
* Looks up @lookup in the database and returns all corresponding dictentries in
* a buffer (see include/buf.h)
*/
dictentry * _nonnull_ create_dictionary(dictpopup_t *d);


void create_ankicard(dictpopup_t d, dictentry de);


#define POSSIBLE_ENTRIES_S_NMEMB 9
typedef struct possible_entries_s {
s8 lookup;
Expand All @@ -13,16 +32,8 @@ typedef struct possible_entries_s {
s8 dictname;
} possible_entries_s;

typedef struct dictpopup_s {
struct dictpopup_s {
possible_entries_s pe;
} dictpopup_s;

dictpopup_s dictpopup_init(int argc, char **argv);

/*
* Looks up @lookup in the database and returns all corresponding dictentries in
* a buffer (see include/buf.h)
*/
dictentry *create_dictionary(dictpopup_s d[static 1]);
};

void create_ankicard(dictpopup_s d, dictentry de);
#endif
11 changes: 4 additions & 7 deletions include/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typedef char byte;
__builtin_unreachable()

#define _drop_(x) __attribute__((__cleanup__(drop_##x)))
#define _nonnull_ __attribute__((nonnull))
#define _printf_(a, b) __attribute__((__format__(printf, a, b)))

#define arrlen(x) \
Expand All @@ -33,10 +34,9 @@ void *xrealloc(void *ptr, size_t size);
#define new(type, num) xcalloc(num, sizeof(type))

/* ------------------- Start s8 utils ---------------- */
#define countof(a) (size)(sizeof(a) / sizeof(*(a)))
#define lengthof(s) (countof("" s "") - 1)
#define lengthof(s) (arrlen("" s "") - 1)
#define s8(s) \
{ (u8 *)s, countof(s) - 1 }
{ (u8 *)s, arrlen(s) - 1 }
#define S(s) (s8) s8(s)

typedef struct {
Expand Down Expand Up @@ -178,10 +178,6 @@ s8 nuke_whitespace(s8 z);
func(*pp); \
}

static inline void drop_frees8(s8 *str) {
free(str->s);
}

static inline void drop_close(int *fd) {
if (*fd >= 0) {
close(*fd);
Expand All @@ -191,5 +187,6 @@ static inline void drop_close(int *fd) {
DEFINE_DROP_FUNC_VOID(free)
DEFINE_DROP_FUNC(FILE *, fclose)
DEFINE_DROP_FUNC(DIR *, closedir)
DEFINE_DROP_FUNC_PTR(s8, frees8)

#endif
199 changes: 199 additions & 0 deletions src/db.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
#include <string.h>

#include "db.h"
#include "messages.h"
#include "util.h"

#define C(call) \
do { \
int _rc = (call); \
die_on(_rc != MDB_SUCCESS, "Database error: %s", mdb_strerror(_rc)); \
} while (0)

database_t db_open(char *dbpath, bool readonly) {
database_t db = {.readonly = readonly};

C(mdb_env_create(&db.env));
mdb_env_set_maxdbs(db.env, 3);

if (readonly) {
C(mdb_env_open(db.env, dbpath, MDB_RDONLY | MDB_NOLOCK | MDB_NORDAHEAD, 0664));
C(mdb_txn_begin(db.env, NULL, MDB_RDONLY, &db.txn));

C(mdb_dbi_open(db.txn, "db1", MDB_DUPSORT | MDB_DUPFIXED, &db.dbi1));
C(mdb_dbi_open(db.txn, "db2", MDB_INTEGERKEY, &db.dbi2));
C(mdb_dbi_open(db.txn, "db3", 0, &db.dbi3));
} else {
unsigned int mapsize = 2097152000; // 2Gb
C(mdb_env_set_mapsize(db.env, mapsize));

C(mdb_env_open(db.env, dbpath, 0, 0664));
C(mdb_txn_begin(db.env, NULL, 0, &db.txn));

// word -> id
C(mdb_dbi_open(db.txn, "db1", MDB_DUPSORT | MDB_DUPFIXED | MDB_CREATE, &db.dbi1));
// id -> dictdef
C(mdb_dbi_open(db.txn, "db2", MDB_INTEGERKEY | MDB_CREATE, &db.dbi2));
// word+reading -> frequency
C(mdb_dbi_open(db.txn, "db3", MDB_CREATE, &db.dbi3));

db.lastdatastr = sb_init(200);
}

return db;
}

void db_close(database_t *db) {
if (db->readonly) {
mdb_txn_abort(db->txn);
} else
C(mdb_txn_commit(db->txn));

mdb_dbi_close(db->env, db->dbi1);
mdb_dbi_close(db->env, db->dbi2);
mdb_dbi_close(db->env, db->dbi3);
mdb_env_close(db->env);

db->env = 0;
db->dbi1 = 0;
db->dbi2 = 0;
db->dbi3 = 0;
db->txn = 0;
}

// TODO: This might be a little inefficient..
static s8 dictent_to_datastr(dictentry de) {
s8 sep = S("\0");
return concat(de.dictname, sep, de.kanji, sep, de.reading, sep, de.definition);
}

void db_put_dictent(database_t *db, s8 headword, dictentry de) {
die_on(db->readonly, "Cannot put dictentry into db in readonly mode.");

MDB_val key_mdb = {.mv_data = headword.s, .mv_size = headword.len};
MDB_val id_mdb = {.mv_data = &db->last_id, .mv_size = sizeof(db->last_id)};

s8 datastr = dictent_to_datastr(de);

if (!s8equals(datastr, sb_gets8(db->lastdatastr))) {
db->last_id++; // Note: The above id struct updates too
MDB_val val_mdb = {.mv_data = datastr.s, .mv_size = datastr.len};

C(mdb_put(db->txn, db->dbi2, &id_mdb, &val_mdb, MDB_NOOVERWRITE | MDB_APPEND));

sb_set(&db->lastdatastr, datastr);
}

// Add key with corresponding id
mdb_put(db->txn, db->dbi1, &key_mdb, &id_mdb, MDB_NODUPDATA);
}

static u32 *get_ids(database_t *db, s8 word, size_t *num) {
MDB_val key_mdb = (MDB_val){.mv_data = word.s, .mv_size = (size_t)word.len};
MDB_val val_mdb = {0};

MDB_cursor *cursor;
C(mdb_cursor_open(db->txn, db->dbi1, &cursor));

int rc;
if ((rc = mdb_cursor_get(cursor, &key_mdb, &val_mdb, MDB_SET)) == MDB_NOTFOUND) {
mdb_cursor_close(cursor);
return NULL;
}
C(rc);
// This reads up to a page, i.e. max 1024 entries, which should be enough
C(mdb_cursor_get(cursor, &key_mdb, &val_mdb, MDB_GET_MULTIPLE));

mdb_cursor_close(cursor);

*num = val_mdb.mv_size / sizeof(u32);
u32 *ret = new (u32, *num);
memcpy(ret, val_mdb.mv_data, val_mdb.mv_size); // ensures proper alignment
return ret;
}

/*
* Returns: dictentry with newly allocated strings parsed from @data
*/
static dictentry data_to_dictent(database_t *db, s8 data) {
s8 data_split[4] = {0};

s8 d = data;
for (size_t i = 0; i < arrlen(data_split); i++) {
assert(d.len > 0);

size len = 0;
while (len < d.len && d.s[len] != '\0')
len++;
data_split[i] = news8(len);
u8copy(data_split[i].s, d.s, data_split[i].len);

d.s += data_split[i].len + 1;
d.len -= data_split[i].len + 1;
}

dictentry ret = {0};
ret.dictname = data_split[0];
ret.kanji = data_split[1];
ret.reading = data_split[2];
ret.definition = data_split[3];
ret.frequency = db_get_freq(db, ret.kanji, ret.reading);
return ret;
}

/*
* Returns: Data associated to id from second database.
* Data is valid until closure of db and should not be freed.
* WARNING: returned data is not null-terminated!
*/
static s8 getdata(database_t *db, u32 id) {
MDB_val key = (MDB_val){.mv_data = &id, .mv_size = sizeof(id)};
MDB_val data = {0};
C(mdb_get(db->txn, db->dbi2, &key, &data));
return (s8){.s = data.mv_data, .len = data.mv_size};
}

void db_get_dictents(database_t *db, s8 headword, dictentry *dict[static 1]) {

size_t n_ids = 0;
u32 *ids = get_ids(db, headword, &n_ids);
if (ids) {
for (size_t i = 0; i < n_ids; i++) {
const s8 de_data = getdata(db, ids[i]);
dictentry de = data_to_dictent(db, de_data);
dictionary_add(dict, de);
}
}
free(ids);
}

void db_put_freq(database_t *db, s8 word, s8 reading, u32 freq) {
die_on(db->readonly, "Cannot put frequency into db in readonly mode.");

s8 key = concat(word, S("\0"), reading);
MDB_val key_mdb = {.mv_data = key.s, .mv_size = key.len};
MDB_val val_mdb = {.mv_data = &freq, .mv_size = sizeof(freq)};

C(mdb_put(db->txn, db->dbi3, &key_mdb, &val_mdb, MDB_NODUPDATA));
}

int db_get_freq(database_t *db, s8 word, s8 reading) {
s8 key = concat(word, S("\0"), reading);
MDB_val key_m = (MDB_val){.mv_data = key.s, .mv_size = (size_t)key.len};
MDB_val val_m = {0};

int rc;
if ((rc = mdb_get(db->txn, db->dbi3, &key_m, &val_m)) == MDB_NOTFOUND)
return -1;
C(rc);

int freq;
assert(sizeof(freq) == val_m.mv_size);
memcpy(&freq, val_m.mv_data, sizeof(freq)); // ensures proper alignment
return freq;
}

i32 db_check_exists(s8 dbpath) {
_drop_(frees8) s8 dbfile = buildpath(dbpath, S("data.mdb"));
return (access((char *)dbfile.s, R_OK) == 0);
}
Loading

0 comments on commit cd618e9

Please sign in to comment.