Skip to content
This repository has been archived by the owner on May 23, 2024. It is now read-only.

Commit

Permalink
use xor filters instead of bloom filters (#145)
Browse files Browse the repository at this point in the history
  • Loading branch information
ozgrakkurt authored Feb 21, 2023
1 parent 6be4333 commit b4d21e3
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 270 deletions.
31 changes: 12 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion core/src/deserialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::result::Result as StdResult;
#[derive(Debug, Clone, derive_more::Deref, derive_more::From, PartialEq, Eq)]
pub struct Bytes32(pub Box<[u8; 32]>);

#[derive(Debug, Clone, derive_more::Deref, derive_more::From, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, derive_more::Deref, derive_more::From, PartialEq, Eq)]
pub struct Address(pub Box<[u8; 20]>);

#[derive(Debug, Clone, derive_more::Deref, derive_more::From, PartialEq, Eq)]
Expand Down
4 changes: 1 addition & 3 deletions worker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ futures = "0.3"
mimalloc = { version = "0.1.34", default-features = false }
crossbeam-channel = "0.5"
derive_more = "0.99"
fnv = "1"
bv = { version = "0.11", features = [ "serde" ] }
rand = "0.8"
xorf = { version = "0.8.1", features = ["serde"] }

eth-archive-core = { path = "../core" }

Expand Down
224 changes: 0 additions & 224 deletions worker/src/bloom.rs

This file was deleted.

13 changes: 9 additions & 4 deletions worker/src/data_ctx.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::config::Config;
use crate::db::DbHandle;
use crate::db_writer::DbWriter;
use crate::db_writer::{hash_addr, DbWriter};
use crate::field_selection::FieldSelection;
use crate::serialize_task::SerializeTask;
use crate::types::{MiniLogSelection, MiniQuery, MiniTransactionSelection, Query};
Expand All @@ -23,6 +23,7 @@ use std::path::Path;
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::{cmp, io};
use xorf::Filter;

pub struct DataCtx {
config: Config,
Expand Down Expand Up @@ -219,7 +220,7 @@ impl DataCtx {

let address = address
.iter()
.filter(|addr| parquet_idx.contains(addr))
.filter(|addr| parquet_idx.contains(&hash_addr(addr.as_slice())))
.cloned()
.collect::<Vec<_>>();

Expand All @@ -242,7 +243,9 @@ impl DataCtx {
Some(source) if !source.is_empty() => {
let source = source
.iter()
.filter(|addr| parquet_idx.contains(addr))
.filter(|addr| {
parquet_idx.contains(&hash_addr(addr.as_slice()))
})
.cloned()
.collect::<Vec<_>>();
if source.is_empty() {
Expand All @@ -258,7 +261,9 @@ impl DataCtx {
Some(dest) if !dest.is_empty() => {
let dest = dest
.iter()
.filter(|addr| parquet_idx.contains(addr))
.filter(|addr| {
parquet_idx.contains(&hash_addr(addr.as_slice()))
})
.cloned()
.collect::<Vec<_>>();
if dest.is_empty() {
Expand Down
4 changes: 1 addition & 3 deletions worker/src/db.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use crate::bloom::Bloom as BloomFilter;
use crate::types::MiniQuery;
use crate::{Error, Result};
use eth_archive_core::deserialize::Address;
Expand All @@ -15,7 +14,6 @@ use std::sync::Arc;
use std::time::Instant;
use std::{cmp, iter, mem};

pub type Bloom = BloomFilter<Address>;
pub type ParquetIdxIter<'a> = Box<dyn Iterator<Item = Result<(DirName, ParquetIdx)>> + 'a>;

pub struct DbHandle {
Expand Down Expand Up @@ -559,7 +557,7 @@ mod cf_name {
pub const ALL_CF_NAMES: [&str; 5] = [BLOCK, TX, LOG, LOG_TX, PARQUET_IDX];
}

pub type ParquetIdx = Bloom;
pub type ParquetIdx = xorf::BinaryFuse16;

fn log_tx_key(block_number: u32, transaction_index: u32) -> [u8; 8] {
let mut key = [0; 8];
Expand Down
Loading

0 comments on commit b4d21e3

Please sign in to comment.