-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
1,186 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[package] | ||
name = "dbgen" | ||
version = "0.1.0" | ||
authors = ["kennytm <[email protected]>"] | ||
edition = "2018" | ||
|
||
description = "Generate random test cases for databases" | ||
|
||
[dependencies] | ||
structopt = "0.2" | ||
clap = "2.32" | ||
pest = "2.0" | ||
pest_derive = "2.0" | ||
failure = "0.1" | ||
rand = "0.5" | ||
data-encoding = "2.1" | ||
regex-syntax = "0.6" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
Template reference | ||
================== | ||
|
||
File syntax | ||
----------- | ||
|
||
The template file should consist of one CREATE TABLE statement and one INSERT statement, like this: | ||
|
||
```sql | ||
CREATE TABLE _ ( | ||
column_1 COLUMN_TYPE_1, | ||
-- ... | ||
column_n COLUMN_TYPE_N | ||
) OPTION_1 = 1, /*...*/ OPTION_N = N; | ||
|
||
INSERT INTO _ VALUES (1, 2, /*...*/ 'N'); | ||
``` | ||
|
||
The table's name must be `_` (an unquoted underscore). This will be substituted with the real name | ||
when written as the real data. The INSERT statement should not list the column names. | ||
|
||
Expression syntax | ||
----------------- | ||
|
||
Each value in the INSERT statement can be an expression. `dbgen` will evaluate the expression to | ||
generate a new row when writing them out. | ||
|
||
### Literals | ||
|
||
`dbgen` supports integer, float and string literals. | ||
|
||
* **Integers** | ||
|
||
Decimal and hexadecimal numbers are supported. The value must be between 0 and | ||
2<sup>64</sup> − 1. | ||
|
||
Examples: `0`, `3`, `18446744073709551615`, `0X1234abcd`, `0xFFFFFFFFFFFFFFFF` | ||
|
||
* **Floating point numbers** | ||
|
||
Numbers will be stored in IEEE-754 double-precision format. | ||
|
||
Examples: `0.0`, `1.5`, `.5`, `2.`, `1e100`, `1.38e-23`, `6.02e+23` | ||
|
||
* **Strings** | ||
|
||
Strings must be encoded as UTF-8, and written between single quotes (double-quoted strings are | ||
*not* supported). To represent a single quote in the string, use `''`. | ||
|
||
Examples: `'Hello'`, `'10 o''clock'` | ||
|
||
### Symbols | ||
|
||
* **rownum** | ||
|
||
The current row number. The first row has value 1. | ||
|
||
### Random functions | ||
|
||
* **rand.int(32)** | ||
|
||
Generates a uniform random signed integer with the given number of bits (must be between 1 and | ||
64). | ||
|
||
* **rand.uint(32)** | ||
|
||
Generates a uniform random unsigned integer with the given number of bits (must be between 1 and | ||
64). | ||
|
||
* **rand.regex('[0-9a-z]+', 'i', 100)** | ||
|
||
Generates a random string satisfying the regular expression. The second and third parameters are | ||
optional. If provided, they specify respectively the regex flags, and maximum repeat count for | ||
the unbounded repetition operators (`+`, `*` and `{n,}`). | ||
|
||
The input string should satisfy the syntax of the Rust regex package. The flags is a string | ||
composed of these letters: | ||
|
||
* `x` (ignore whitespace) | ||
* `i` (case insensitive) | ||
* `s` (dot matches new-line) | ||
* `u` (enable Unicode mode) | ||
* `a` (disable Unicode mode) | ||
* `o` (recognize octal escapes) | ||
|
||
The flags `m` (multi-line) and `U` (ungreedy) does not affect string generation and are ignored. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
CREATE TABLE _ ( | ||
id INTEGER NOT NULL, | ||
k INTEGER DEFAULT '0' NOT NULL, | ||
PRIMARY KEY (id) | ||
); | ||
INSERT INTO _ VALUES (rownum, rownum); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
CREATE TABLE _( | ||
id INTEGER NOT NULL AUTO_INCREMENT, | ||
k INTEGER DEFAULT '0' NOT NULL, | ||
c CHAR(120) DEFAULT '' NOT NULL, | ||
pad CHAR(60) DEFAULT '' NOT NULL, | ||
PRIMARY KEY(id), | ||
INDEX KEY(k) | ||
); | ||
INSERT INTO _ VALUES ( | ||
rownum, | ||
rand.int(32), | ||
rand.regex('([0-9]{11}-){9}[0-9]{11}'), | ||
rand.regex('([0-9]{11}-){4}[0-9]{11}') | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
edition = "2018" | ||
max_width = 120 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
use dbgen::{quote::Quote, Generator, RngSeed, Template}; | ||
|
||
use data_encoding::{DecodeError, DecodeKind, HEXLOWER_PERMISSIVE}; | ||
use failure::{Error, ResultExt}; | ||
use rand::{EntropyRng, Rng}; | ||
use structopt::StructOpt; | ||
|
||
use std::fs::{create_dir_all, read_to_string, File}; | ||
use std::io::BufWriter; | ||
use std::path::PathBuf; | ||
use std::process::exit; | ||
|
||
#[derive(StructOpt, Debug)] | ||
struct Args { | ||
#[structopt(short = "d", long = "schema-name", help = "Schema name")] | ||
schema_name: String, | ||
|
||
#[structopt(short = "t", long = "table-name", help = "Table name")] | ||
table_name: String, | ||
|
||
#[structopt( | ||
short = "o", | ||
long = "out-dir", | ||
help = "Output directory", | ||
parse(from_os_str) | ||
)] | ||
out_dir: PathBuf, | ||
|
||
#[structopt( | ||
short = "k", | ||
long = "files-count", | ||
help = "Number of files to generate", | ||
default_value = "1" | ||
)] | ||
files_count: u32, | ||
|
||
#[structopt( | ||
short = "n", | ||
long = "inserts-count", | ||
help = "Number of INSERT statements per file" | ||
)] | ||
inserts_count: u32, | ||
|
||
#[structopt( | ||
short = "r", | ||
long = "rows-count", | ||
help = "Number of rows per INSERT statement", | ||
default_value = "1" | ||
)] | ||
rows_count: u32, | ||
|
||
#[structopt( | ||
short = "i", | ||
long = "template", | ||
help = "Generation template SQL", | ||
parse(from_os_str) | ||
)] | ||
template: PathBuf, | ||
|
||
#[structopt( | ||
short = "s", | ||
long = "seed", | ||
help = "Random number generator seed (should have 64 hex digits)", | ||
parse(try_from_str = "seed_from_str") | ||
)] | ||
seed: Option<RngSeed>, | ||
|
||
#[structopt( | ||
short = "q", | ||
long = "quote", | ||
help = "Identifier quote style", | ||
raw(possible_values = r#"&["double", "backquote", "brackets"]"#), | ||
default_value = "double", | ||
parse(from_str = "quote_from_str") | ||
)] | ||
quote: Quote, | ||
} | ||
|
||
fn quote_from_str(s: &str) -> Quote { | ||
match s { | ||
"double" => Quote::Double, | ||
"backquote" => Quote::Backquote, | ||
"brackets" => Quote::Brackets, | ||
_ => unreachable!(), | ||
} | ||
} | ||
|
||
fn seed_from_str(s: &str) -> Result<RngSeed, DecodeError> { | ||
let mut seed = RngSeed::default(); | ||
|
||
if HEXLOWER_PERMISSIVE.decode_len(s.len())? != seed.len() { | ||
return Err(DecodeError { | ||
position: s.len(), | ||
kind: DecodeKind::Length, | ||
}); | ||
} | ||
match HEXLOWER_PERMISSIVE.decode_mut(s.as_bytes(), &mut seed) { | ||
Ok(_) => Ok(seed), | ||
Err(e) => Err(e.error), | ||
} | ||
} | ||
|
||
fn main() { | ||
if let Err(err) = run() { | ||
eprintln!("{}\n", err); | ||
for (e, i) in err.iter_causes().zip(1..) { | ||
eprintln!("{:=^80}\n{}\n", format!(" ERROR CAUSE #{} ", i), e); | ||
} | ||
exit(1); | ||
} | ||
} | ||
|
||
fn run() -> Result<(), Error> { | ||
let args = Args::from_args(); | ||
let input = read_to_string(&args.template).context("failed to read template")?; | ||
let template = Template::parse(&input)?; | ||
|
||
let generator = Generator::new(template, args.quote, &args.table_name); | ||
|
||
create_dir_all(&args.out_dir).context("failed to create output directory")?; | ||
let schema_path = args | ||
.out_dir | ||
.join(format!("{}.{}-schema.sql", args.schema_name, args.table_name)); | ||
let schema_file = File::create(&schema_path) | ||
.with_context(|_| format!("failed to create schema file at {}", schema_path.display()))?; | ||
let schema_file = BufWriter::new(schema_file); | ||
generator | ||
.write_sql_schema(schema_file) | ||
.with_context(|_| format!("failed to write to schema file at {}", schema_path.display()))?; | ||
|
||
let seed = args.seed.unwrap_or_else(|| EntropyRng::new().gen::<RngSeed>()); | ||
eprintln!("Using seed: {}", HEXLOWER_PERMISSIVE.encode(&seed)); | ||
let mut compiled = generator.compile(seed)?; | ||
|
||
let num_digits = args.files_count.to_string().len(); | ||
for file_index in 1..=args.files_count { | ||
let data_path = args.out_dir.join(format!( | ||
"{0}.{1}.{2:03$}.sql", | ||
args.schema_name, args.table_name, file_index, num_digits | ||
)); | ||
let data_file = | ||
File::create(&data_path).with_context(|_| format!("failed to data file at {}", data_path.display()))?; | ||
let mut data_file = BufWriter::new(data_file); | ||
for _ in 0..args.inserts_count { | ||
compiled | ||
.write_sql(&mut data_file, args.rows_count) | ||
.with_context(|_| format!("failed to write to data file at {}", data_path.display()))?; | ||
} | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
use failure::{Backtrace, Context, Fail}; | ||
use std::fmt; | ||
|
||
use crate::parser::Function; | ||
|
||
#[derive(Fail, Debug, Clone, PartialEq, Eq)] | ||
//#[non_exhaustive] | ||
pub enum ErrorKind { | ||
#[fail(display = "failed to parse template")] | ||
ParseTemplate, | ||
|
||
#[fail(display = "unknown function '{}'", 0)] | ||
UnknownFunction(String), | ||
|
||
#[fail(display = "integer '{}' is too big", 0)] | ||
IntegerOverflow(String), | ||
|
||
#[fail(display = "not enough arguments to function {}", 0)] | ||
NotEnoughArguments(Function), | ||
|
||
#[fail(display = "invalid regex {}", 0)] | ||
InvalidRegex(String), | ||
|
||
#[fail(display = "unknown regex flag {}", 0)] | ||
UnknownRegexFlag(char), | ||
|
||
#[fail(display = "unsupported regex element: '{}'", 0)] | ||
UnsupportedRegexElement(String), | ||
|
||
#[fail( | ||
display = "invalid argument type: in function {}, argument #{} should be a {}", | ||
name, | ||
index, | ||
expected | ||
)] | ||
InvalidArgumentType { | ||
name: Function, | ||
index: usize, | ||
expected: &'static str, | ||
}, | ||
|
||
#[fail(display = "failed to write SQL schema")] | ||
WriteSqlSchema, | ||
|
||
#[fail(display = "failed to write SQL data")] | ||
WriteSqlData, | ||
|
||
#[fail(display = "failed to write SQL value")] | ||
WriteSqlValue, | ||
|
||
#[doc(hidden)] | ||
#[fail(display = "(placeholder)")] | ||
__NonExhaustive, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct Error { | ||
inner: Context<ErrorKind>, | ||
} | ||
|
||
impl Fail for Error { | ||
fn cause(&self) -> Option<&dyn Fail> { | ||
self.inner.cause() | ||
} | ||
|
||
fn backtrace(&self) -> Option<&Backtrace> { | ||
self.inner.backtrace() | ||
} | ||
} | ||
|
||
impl fmt::Display for Error { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
self.inner.fmt(f) | ||
} | ||
} | ||
|
||
impl Error { | ||
pub fn kind(&self) -> &ErrorKind { | ||
self.inner.get_context() | ||
} | ||
} | ||
|
||
impl From<ErrorKind> for Error { | ||
fn from(kind: ErrorKind) -> Self { | ||
Self { | ||
inner: Context::new(kind), | ||
} | ||
} | ||
} | ||
|
||
impl From<Context<ErrorKind>> for Error { | ||
fn from(inner: Context<ErrorKind>) -> Self { | ||
Self { inner } | ||
} | ||
} |
Oops, something went wrong.