Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sync y-octo's logic #563

Merged
merged 14 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions apps/doc_merger/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fn jwst_merge(path: &str, output: &str) {
let mut doc = Doc::default();
for (i, update) in updates.iter().enumerate() {
println!("apply update{i} {} bytes", update.len());
doc.apply_update_from_binary(update.clone()).unwrap();
doc.apply_update_from_binary_v1(update.clone()).unwrap();
println!("status: {:?}", doc.store_status());
}
let ts = Instant::now();
Expand All @@ -93,7 +93,7 @@ fn jwst_merge(path: &str, output: &str) {

{
let mut doc = Doc::default();
doc.apply_update_from_binary(binary.clone()).unwrap();
doc.apply_update_from_binary_v1(binary.clone()).unwrap();
let new_binary = doc.encode_update_v1().unwrap();
let new_json = serde_json::to_string_pretty(&doc.get_map("space:blocks").unwrap()).unwrap();
assert_json_diff::assert_json_eq!(doc.get_map("space:blocks").unwrap(), json);
Expand Down
2 changes: 1 addition & 1 deletion apps/keck/src/server/api/blocks/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl From<(&str, &History)> for BlockHistory {
fn from((workspace_id, history): (&str, &History)) -> Self {
Self {
workspace_id: workspace_id.into(),
field_name: history.field_name.clone(),
field_name: history.field_name.as_ref().map(|s| s.to_string()),
parent: history.parent.iter().map(|id| id.to_string()).collect::<Vec<_>>(),
content: history.content.clone(),
action: history.action.to_string(),
Expand Down
7 changes: 7 additions & 0 deletions libs/jwst-codec-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
default = ["merger"]
bench = ["regex"]
fuzz = ["arbitrary", "phf"]
merger = ["clap", "jwst-codec/large_refs"]

[dependencies]
arbitrary = { version = "1.3", features = ["derive"], optional = true }
clap = { version = "4.4", features = ["derive"], optional = true }
phf = { version = "0.11", features = ["macros"], optional = true }
rand_chacha = "0.3"
regex = { version = "1.9", optional = true }
Expand Down Expand Up @@ -59,5 +62,9 @@ name = "text_ops_benchmarks"
harness = false
name = "update_benchmarks"

[[bench]]
harness = false
name = "apply_benchmarks"

[lib]
bench = true
35 changes: 35 additions & 0 deletions libs/jwst-codec-utils/benches/apply_benchmarks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
mod utils;

use std::time::Duration;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;

fn apply(c: &mut Criterion) {
let files = Files::load();

let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));

for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with yrs", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use yrs::{updates::decoder::Decode, Doc, Transact, Update};
let update = Update::decode_v1(content).unwrap();
let doc = Doc::new();
doc.transact_mut().apply_update(update);
});
},
);
}

group.finish();
}

criterion_group!(benches, apply);
criterion_main!(benches);
100 changes: 100 additions & 0 deletions libs/jwst-codec-utils/bin/doc_merger.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use std::{
fs::read,
io::{Error, ErrorKind},
path::PathBuf,
time::Instant,
};

use clap::Parser;
use y_octo::Doc;

/// ybinary merger
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// Path of the ybinary to read
#[arg(short, long)]
path: String,
}

fn load_path(path: &str) -> Result<Vec<Vec<u8>>, Error> {
let path = PathBuf::from(path);
if path.is_dir() {
let mut updates = Vec::new();
let mut paths = path
.read_dir()?
.filter_map(|entry| {
let entry = entry.ok()?;
if entry.path().is_file() {
Some(entry.path())
} else {
None
}
})
.collect::<Vec<_>>();
paths.sort();

for path in paths {
println!("read {:?}", path);
updates.push(read(path)?);
}
Ok(updates)
} else if path.is_file() {
Ok(vec![read(path)?])
} else {
Err(Error::new(ErrorKind::NotFound, "not a file or directory"))
}
}

fn main() {
let args = Args::parse();
jwst_merge(&args.path);
}

fn jwst_merge(path: &str) {
let updates = load_path(path).unwrap();

let mut doc = Doc::default();
for (i, update) in updates.iter().enumerate() {
println!("apply update{i} {} bytes", update.len());
doc.apply_update_from_binary_v1(update.clone()).unwrap();
}

println!("press enter to continue");
std::io::stdin().read_line(&mut String::new()).unwrap();
let ts = Instant::now();
let history = doc.history().parse_store(Default::default());
println!("history: {:?}", ts.elapsed());
for history in history.iter().take(100) {
println!("history: {:?}", history);
}

doc.gc().unwrap();

let binary = {
let binary = doc.encode_update_v1().unwrap();

println!("merged {} bytes", binary.len());

binary
};

{
let mut doc = Doc::default();
doc.apply_update_from_binary_v1(binary.clone()).unwrap();
let new_binary = doc.encode_update_v1().unwrap();

println!("re-encoded {} bytes", new_binary.len(),);
};
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
#[ignore = "only for debug"]
fn test_gc() {
jwst_merge("/Users/ds/Downloads/out");
}
}
2 changes: 1 addition & 1 deletion libs/jwst-codec-utils/fuzz/fuzz_targets/apply_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ fuzz_target!(|crdt_params: Vec<CRDTParam>| {

let trx = doc.transact_mut();
let binary_from_yrs = trx.encode_update_v1().unwrap();
let doc = jwst_codec::Doc::new_from_binary(binary_from_yrs.clone()).unwrap();
let doc = jwst_codec::Doc::new_from_binary(binary_from_yrs).unwrap();
let binary = doc.encode_update_v1().unwrap();
assert_eq!(binary, binary_from_yrs);
});
4 changes: 2 additions & 2 deletions libs/jwst-codec-utils/fuzz/fuzz_targets/codec_doc_any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ use jwst_codec::{Any, CrdtRead, CrdtWrite, RawDecoder, RawEncoder};
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
if let Ok(any) = Any::read(&mut RawDecoder::new(data.to_vec())) {
if let Ok(any) = Any::read(&mut RawDecoder::new(data)) {
// ensure decoding and re-encoding results has same result
let mut buffer = RawEncoder::default();
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(buffer.into_inner())) {
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fuzz_target!(|data: Vec<Any>| {
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(buffer.into_inner())) {
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
Expand All @@ -35,7 +35,7 @@ fuzz_target!(|data: Vec<Any>| {
if let Err(e) = any.write(&mut buffer) {
panic!("Failed to write message: {:?}, {:?}", any, e);
}
if let Ok(any2) = Any::read(&mut RawDecoder::new(buffer.into_inner())) {
if let Ok(any2) = Any::read(&mut RawDecoder::new(&buffer.into_inner())) {
assert_eq!(any, any2);
}
}
Expand Down
2 changes: 1 addition & 1 deletion libs/jwst-codec-utils/src/doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod tests {

let binary_from_yrs = trx.encode_update_v1().unwrap();

let doc = Doc::new_from_binary(binary_from_yrs.clone()).unwrap();
let doc = Doc::try_from_binary_v1(&binary_from_yrs).unwrap();
let binary = doc.encode_update_v1().unwrap();

assert_eq!(binary_from_yrs, binary);
Expand Down
6 changes: 6 additions & 0 deletions libs/jwst-codec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ahash = "0.8"
bitvec = "1.0"
byteorder = "1.5"
nom = "7.1"
ordered-float = "4.1"
rand_chacha = "0.3"
rand_distr = "0.4"
smol_str = "0.2"

# ======= workspace dependencies =======
nanoid = { workspace = true }
Expand Down Expand Up @@ -77,5 +79,9 @@ name = "text_ops_benchmarks"
harness = false
name = "update_benchmarks"

[[bench]]
harness = false
name = "apply_benchmarks"

[lib]
bench = true
34 changes: 34 additions & 0 deletions libs/jwst-codec/benches/apply_benchmarks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
mod utils;

use std::time::Duration;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;

fn apply(c: &mut Criterion) {
let files = Files::load();

let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));

for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with jwst", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use y_octo::*;
let mut doc = Doc::new();
doc.apply_update_from_binary_v1(content.clone()).unwrap()
});
},
);
}

group.finish();
}

criterion_group!(benches, apply);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion libs/jwst-codec/benches/update_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ fn update(c: &mut Criterion) {
|b, content| {
b.iter(|| {
use jwst_codec::*;
let mut decoder = RawDecoder::new(content.clone());
let mut decoder = RawDecoder::new(content);
Update::read(&mut decoder).unwrap()
});
},
Expand Down
6 changes: 3 additions & 3 deletions libs/jwst-codec/src/doc/codec/any.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, fmt, ops::RangeInclusive};
use std::{fmt, ops::RangeInclusive};

use ordered_float::OrderedFloat;

Expand Down Expand Up @@ -600,7 +600,7 @@ mod tests {
any.write(&mut encoder).unwrap();
let encoded = encoder.into_inner();

let mut decoder = RawDecoder::new(encoded);
let mut decoder = RawDecoder::new(&encoded);
let decoded = Any::read(&mut decoder).unwrap();

assert_eq!(any, decoded);
Expand All @@ -615,7 +615,7 @@ mod tests {
any.write(&mut encoder).unwrap();
let encoded = encoder.into_inner();

let mut decoder = RawDecoder::new(encoded);
let mut decoder = RawDecoder::new(&encoded);
let decoded = Any::read(&mut decoder).unwrap();

assert_eq!(any, &decoded);
Expand Down
6 changes: 4 additions & 2 deletions libs/jwst-codec/src/doc/codec/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ impl Content {
match self {
Self::Deleted(len) => *len,
Self::Json(strings) => strings.len() as u64,
Self::String(string) => string.encode_utf16().count() as u64,
// TODO: need a custom wrapper with length cached, this cost too much
Self::String(string) => string.chars().map(|c| c.len_utf16()).sum::<usize>() as u64,
Self::Any(any) => any.len() as u64,
Self::Binary(_) | Self::Embed(_) | Self::Format { .. } | Self::Type(_) | Self::Doc { .. } => 1,
}
Expand Down Expand Up @@ -283,8 +284,9 @@ mod tests {
let mut writer = RawEncoder::default();
writer.write_u8(content.get_info())?;
content.write(&mut writer)?;
let update = writer.into_inner();

let mut reader = RawDecoder::new(writer.into_inner());
let mut reader = RawDecoder::new(&update);
let tag_type = reader.read_u8()?;
assert_eq!(Content::read(&mut reader, tag_type)?, *content);

Expand Down
Loading
Loading