From 42186e01d74c32d287979318870cbfb3a971d403 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Mon, 24 Jun 2024 07:53:14 -0400 Subject: [PATCH 1/3] `deps`: enable `uuid` `v7` feature --- Cargo.lock | 18 ++++++++++++++---- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaf49e074..67a876234 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -709,6 +709,15 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4790f9e8961209112beb783d85449b508673cf4a6a419c8449b210743ac4dbe9" +[[package]] +name = "atomic" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +dependencies = [ + "bytemuck", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1252,7 +1261,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" dependencies = [ "crossterm", - "strum 0.26.3", + "strum 0.26.2", "strum_macros 0.26.4", "unicode-width", ] @@ -4812,7 +4821,7 @@ dependencies = [ "smartstring", "snap", "strsim", - "strum 0.26.3", + "strum 0.26.2", "strum_macros 0.26.4", "sysinfo", "tabwriter", @@ -6029,9 +6038,9 @@ dependencies = [ [[package]] name = "strum" -version = "0.26.3" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ "phf 0.10.1", ] @@ -6655,6 +6664,7 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ea73390fe27785838dcbf75b91b1d84799e28f1ce71e6f372a5dc2200c80de5" dependencies = [ + "atomic", "getrandom", ] diff --git a/Cargo.toml b/Cargo.toml index 0bc137b06..91e50c924 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -221,7 +221,7 @@ thousands = { version = "0.2", optional = true } threadpool = "1.8" titlecase = { version = "3", optional = true } tokio = { version = "1", features = ["rt-multi-thread"] } -uuid = { version = "1", features = ["v4"] } +uuid = { version = "1", features = ["v4", "v7"] } url = "2.5" vader_sentiment = { version = "0.1", optional = true } whatlang = { version = "0.16", optional = true } From 18b3998bdf918ee6813688cb28244324870d8dd8 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Mon, 24 Jun 2024 07:58:14 -0400 Subject: [PATCH 2/3] `enum`: add `--uuid7` option --- src/cmd/enumerate.rs | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/cmd/enumerate.rs b/src/cmd/enumerate.rs index c2109ce47..2255f0527 100644 --- a/src/cmd/enumerate.rs +++ b/src/cmd/enumerate.rs @@ -8,8 +8,11 @@ The enum function has four modes of operation: 1. INCREMENT. Add an incremental identifier to each of the lines: $ qsv enum file.csv - 2. UUID. Add a uuid v4 to each of the lines: - $ qsv enum --uuid file.csv + 2. UUID4. Add a uuid v4 to each of the lines: + $ qsv enum --uuid4 file.csv + + 3. UUID7. Add a uuid v7 to each of the lines: + $ qsv enum --uuid7 file.csv 3. CONSTANT. Create a new column filled with a given value: $ qsv enum --constant 0 @@ -48,9 +51,14 @@ enum options: To specify a null value, pass the literal "". --copy Name of a column to copy. Changes the default column name to "{column}_copy". - --uuid When set, the column will be populated with + --uuid4 When set, the column will be populated with uuids (v4) instead of the incremental identifier. - Changes the default column name to "uuid". + Changes the default column name to "uuid4". + --uuid7 When set, the column will be populated with + uuids (v7) instead of the incremental identifier. + uuid v7 is a time-based uuid and is monotonically increasing. + See https://buildkite.com/blog/goodbye-integers-hello-uuids + Changes the default column name to "uuid7". --hash Create a new column filled with the hash of the given column/s. Use "1-" to hash all columns. Changes the default column name to "hash". @@ -91,7 +99,8 @@ struct Args { flag_increment: Option, flag_constant: Option, flag_copy: Option, - flag_uuid: bool, + flag_uuid4: bool, + flag_uuid7: bool, flag_hash: Option, flag_output: Option, flag_no_headers: bool, @@ -100,7 +109,8 @@ struct Args { enum EnumOperation { Increment, - Uuid, + Uuid4, + Uuid7, Constant, Copy, Hash, @@ -168,8 +178,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> { if !rconfig.no_headers { if let Some(column_name) = &args.flag_new_column { headers.push_field(column_name.as_bytes()); - } else if args.flag_uuid { - headers.push_field(b"uuid"); + } else if args.flag_uuid4 { + headers.push_field(b"uuid4"); + } else if args.flag_uuid7 { + headers.push_field(b"uuid7"); } else if args.flag_constant.is_some() { headers.push_field(b"constant"); } else if copy_operation { @@ -205,8 +217,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let enum_operation = if args.flag_constant.is_some() { EnumOperation::Constant - } else if args.flag_uuid { - EnumOperation::Uuid + } else if args.flag_uuid4 { + EnumOperation::Uuid4 + } else if args.flag_uuid7 { + EnumOperation::Uuid7 } else if copy_operation { EnumOperation::Copy } else if args.flag_hash.is_some() { @@ -231,7 +245,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { record.push_field(itoa_buffer.format(counter).as_bytes()); counter += increment; }, - EnumOperation::Uuid => { + EnumOperation::Uuid4 => { let id = Uuid::new_v4(); record.push_field( id.as_hyphenated() @@ -239,6 +253,14 @@ pub fn run(argv: &[&str]) -> CliResult<()> { .as_bytes(), ); }, + EnumOperation::Uuid7 => { + let id = Uuid::now_v7(); + record.push_field( + id.as_hyphenated() + .encode_lower(&mut Uuid::encode_buffer()) + .as_bytes(), + ); + }, EnumOperation::Constant => { record.push_field(constant_value); }, From 36bd00f9e0eefb3c77b6e20cb530e62eb7bcd6a4 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Mon, 24 Jun 2024 08:07:20 -0400 Subject: [PATCH 3/3] `enum`: correct shuffling example using uuid4 --- src/cmd/enumerate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/enumerate.rs b/src/cmd/enumerate.rs index 2255f0527..7b565bc6d 100644 --- a/src/cmd/enumerate.rs +++ b/src/cmd/enumerate.rs @@ -31,7 +31,7 @@ The enum function has four modes of operation: Finally, note that you should also be able to shuffle the lines of a CSV file by sorting on the generated uuids: - $ qsv enum --uuid file.csv | qsv sort -s uuid > shuffled.csv + $ qsv enum --uuid4 file.csv | qsv sort -s uuid > shuffled.csv Usage: qsv enum [options] []