Skip to content

Commit

Permalink
Initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
kennytm committed Oct 5, 2018
1 parent 0e91190 commit 9229309
Show file tree
Hide file tree
Showing 13 changed files with 1,186 additions and 0 deletions.
17 changes: 17 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "dbgen"
version = "0.1.0"
authors = ["kennytm <[email protected]>"]
edition = "2018"

description = "Generate random test cases for databases"

[dependencies]
structopt = "0.2"
clap = "2.32"
pest = "2.0"
pest_derive = "2.0"
failure = "0.1"
rand = "0.5"
data-encoding = "2.1"
regex-syntax = "0.6"
87 changes: 87 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
Template reference
==================

File syntax
-----------

The template file should consist of one CREATE TABLE statement and one INSERT statement, like this:

```sql
CREATE TABLE _ (
column_1 COLUMN_TYPE_1,
-- ...
column_n COLUMN_TYPE_N
) OPTION_1 = 1, /*...*/ OPTION_N = N;

INSERT INTO _ VALUES (1, 2, /*...*/ 'N');
```

The table's name must be `_` (an unquoted underscore). This will be substituted with the real name
when written as the real data. The INSERT statement should not list the column names.

Expression syntax
-----------------

Each value in the INSERT statement can be an expression. `dbgen` will evaluate the expression to
generate a new row when writing them out.

### Literals

`dbgen` supports integer, float and string literals.

* **Integers**

Decimal and hexadecimal numbers are supported. The value must be between 0 and
2<sup>64</sup> − 1.

Examples: `0`, `3`, `18446744073709551615`, `0X1234abcd`, `0xFFFFFFFFFFFFFFFF`

* **Floating point numbers**

Numbers will be stored in IEEE-754 double-precision format.

Examples: `0.0`, `1.5`, `.5`, `2.`, `1e100`, `1.38e-23`, `6.02e+23`

* **Strings**

Strings must be encoded as UTF-8, and written between single quotes (double-quoted strings are
*not* supported). To represent a single quote in the string, use `''`.

Examples: `'Hello'`, `'10 o''clock'`

### Symbols

* **rownum**

The current row number. The first row has value 1.

### Random functions

* **rand.int(32)**

Generates a uniform random signed integer with the given number of bits (must be between 1 and
64).

* **rand.uint(32)**

Generates a uniform random unsigned integer with the given number of bits (must be between 1 and
64).

* **rand.regex('[0-9a-z]+', 'i', 100)**

Generates a random string satisfying the regular expression. The second and third parameters are
optional. If provided, they specify respectively the regex flags, and maximum repeat count for
the unbounded repetition operators (`+`, `*` and `{n,}`).

The input string should satisfy the syntax of the Rust regex package. The flags is a string
composed of these letters:

* `x` (ignore whitespace)
* `i` (case insensitive)
* `s` (dot matches new-line)
* `u` (enable Unicode mode)
* `a` (disable Unicode mode)
* `o` (recognize octal escapes)

The flags `m` (multi-line) and `U` (ungreedy) does not affect string generation and are ignored.

6 changes: 6 additions & 0 deletions res/sysbench_bulk_insert.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
CREATE TABLE _ (
id INTEGER NOT NULL,
k INTEGER DEFAULT '0' NOT NULL,
PRIMARY KEY (id)
);
INSERT INTO _ VALUES (rownum, rownum);
14 changes: 14 additions & 0 deletions res/sysbench_oltp_uniform.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE TABLE _(
id INTEGER NOT NULL AUTO_INCREMENT,
k INTEGER DEFAULT '0' NOT NULL,
c CHAR(120) DEFAULT '' NOT NULL,
pad CHAR(60) DEFAULT '' NOT NULL,
PRIMARY KEY(id),
INDEX KEY(k)
);
INSERT INTO _ VALUES (
rownum,
rand.int(32),
rand.regex('([0-9]{11}-){9}[0-9]{11}'),
rand.regex('([0-9]{11}-){4}[0-9]{11}')
);
2 changes: 2 additions & 0 deletions rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
edition = "2018"
max_width = 120
152 changes: 152 additions & 0 deletions src/bin/dbgen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
use dbgen::{quote::Quote, Generator, RngSeed, Template};

use data_encoding::{DecodeError, DecodeKind, HEXLOWER_PERMISSIVE};
use failure::{Error, ResultExt};
use rand::{EntropyRng, Rng};
use structopt::StructOpt;

use std::fs::{create_dir_all, read_to_string, File};
use std::io::BufWriter;
use std::path::PathBuf;
use std::process::exit;

#[derive(StructOpt, Debug)]
struct Args {
#[structopt(short = "d", long = "schema-name", help = "Schema name")]
schema_name: String,

#[structopt(short = "t", long = "table-name", help = "Table name")]
table_name: String,

#[structopt(
short = "o",
long = "out-dir",
help = "Output directory",
parse(from_os_str)
)]
out_dir: PathBuf,

#[structopt(
short = "k",
long = "files-count",
help = "Number of files to generate",
default_value = "1"
)]
files_count: u32,

#[structopt(
short = "n",
long = "inserts-count",
help = "Number of INSERT statements per file"
)]
inserts_count: u32,

#[structopt(
short = "r",
long = "rows-count",
help = "Number of rows per INSERT statement",
default_value = "1"
)]
rows_count: u32,

#[structopt(
short = "i",
long = "template",
help = "Generation template SQL",
parse(from_os_str)
)]
template: PathBuf,

#[structopt(
short = "s",
long = "seed",
help = "Random number generator seed (should have 64 hex digits)",
parse(try_from_str = "seed_from_str")
)]
seed: Option<RngSeed>,

#[structopt(
short = "q",
long = "quote",
help = "Identifier quote style",
raw(possible_values = r#"&["double", "backquote", "brackets"]"#),
default_value = "double",
parse(from_str = "quote_from_str")
)]
quote: Quote,
}

fn quote_from_str(s: &str) -> Quote {
match s {
"double" => Quote::Double,
"backquote" => Quote::Backquote,
"brackets" => Quote::Brackets,
_ => unreachable!(),
}
}

fn seed_from_str(s: &str) -> Result<RngSeed, DecodeError> {
let mut seed = RngSeed::default();

if HEXLOWER_PERMISSIVE.decode_len(s.len())? != seed.len() {
return Err(DecodeError {
position: s.len(),
kind: DecodeKind::Length,
});
}
match HEXLOWER_PERMISSIVE.decode_mut(s.as_bytes(), &mut seed) {
Ok(_) => Ok(seed),
Err(e) => Err(e.error),
}
}

fn main() {
if let Err(err) = run() {
eprintln!("{}\n", err);
for (e, i) in err.iter_causes().zip(1..) {
eprintln!("{:=^80}\n{}\n", format!(" ERROR CAUSE #{} ", i), e);
}
exit(1);
}
}

fn run() -> Result<(), Error> {
let args = Args::from_args();
let input = read_to_string(&args.template).context("failed to read template")?;
let template = Template::parse(&input)?;

let generator = Generator::new(template, args.quote, &args.table_name);

create_dir_all(&args.out_dir).context("failed to create output directory")?;
let schema_path = args
.out_dir
.join(format!("{}.{}-schema.sql", args.schema_name, args.table_name));
let schema_file = File::create(&schema_path)
.with_context(|_| format!("failed to create schema file at {}", schema_path.display()))?;
let schema_file = BufWriter::new(schema_file);
generator
.write_sql_schema(schema_file)
.with_context(|_| format!("failed to write to schema file at {}", schema_path.display()))?;

let seed = args.seed.unwrap_or_else(|| EntropyRng::new().gen::<RngSeed>());
eprintln!("Using seed: {}", HEXLOWER_PERMISSIVE.encode(&seed));
let mut compiled = generator.compile(seed)?;

let num_digits = args.files_count.to_string().len();
for file_index in 1..=args.files_count {
let data_path = args.out_dir.join(format!(
"{0}.{1}.{2:03$}.sql",
args.schema_name, args.table_name, file_index, num_digits
));
let data_file =
File::create(&data_path).with_context(|_| format!("failed to data file at {}", data_path.display()))?;
let mut data_file = BufWriter::new(data_file);
for _ in 0..args.inserts_count {
compiled
.write_sql(&mut data_file, args.rows_count)
.with_context(|_| format!("failed to write to data file at {}", data_path.display()))?;
}
}

Ok(())
}
95 changes: 95 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use failure::{Backtrace, Context, Fail};
use std::fmt;

use crate::parser::Function;

#[derive(Fail, Debug, Clone, PartialEq, Eq)]
//#[non_exhaustive]
pub enum ErrorKind {
#[fail(display = "failed to parse template")]
ParseTemplate,

#[fail(display = "unknown function '{}'", 0)]
UnknownFunction(String),

#[fail(display = "integer '{}' is too big", 0)]
IntegerOverflow(String),

#[fail(display = "not enough arguments to function {}", 0)]
NotEnoughArguments(Function),

#[fail(display = "invalid regex {}", 0)]
InvalidRegex(String),

#[fail(display = "unknown regex flag {}", 0)]
UnknownRegexFlag(char),

#[fail(display = "unsupported regex element: '{}'", 0)]
UnsupportedRegexElement(String),

#[fail(
display = "invalid argument type: in function {}, argument #{} should be a {}",
name,
index,
expected
)]
InvalidArgumentType {
name: Function,
index: usize,
expected: &'static str,
},

#[fail(display = "failed to write SQL schema")]
WriteSqlSchema,

#[fail(display = "failed to write SQL data")]
WriteSqlData,

#[fail(display = "failed to write SQL value")]
WriteSqlValue,

#[doc(hidden)]
#[fail(display = "(placeholder)")]
__NonExhaustive,
}

#[derive(Debug)]
pub struct Error {
inner: Context<ErrorKind>,
}

impl Fail for Error {
fn cause(&self) -> Option<&dyn Fail> {
self.inner.cause()
}

fn backtrace(&self) -> Option<&Backtrace> {
self.inner.backtrace()
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.inner.fmt(f)
}
}

impl Error {
pub fn kind(&self) -> &ErrorKind {
self.inner.get_context()
}
}

impl From<ErrorKind> for Error {
fn from(kind: ErrorKind) -> Self {
Self {
inner: Context::new(kind),
}
}
}

impl From<Context<ErrorKind>> for Error {
fn from(inner: Context<ErrorKind>) -> Self {
Self { inner }
}
}
Loading

0 comments on commit 9229309

Please sign in to comment.