-
Notifications
You must be signed in to change notification settings - Fork 74
/
flatten.rs
100 lines (87 loc) · 3.63 KB
/
flatten.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
static USAGE: &str = r#"
Prints flattened records such that fields are labeled separated by a new line.
This mode is particularly useful for viewing one record at a time. Each
record is separated by a special '#' character (on a line by itself), which
can be changed with the --separator flag.
There is also a condensed view (-c or --condense) that will shorten the
contents of each field to provide a summary view.
For examples, see https://github.com/dathere/qsv/blob/master/tests/test_flatten.rs.
Usage:
qsv flatten [options] [<input>]
qsv flatten --help
flatten options:
-c, --condense <arg> Limits the length of each field to the value
specified. If the field is UTF-8 encoded, then
<arg> refers to the number of code points.
Otherwise, it refers to the number of bytes.
-f, --field-separator <arg> A string of character to write between a column name
and its value.
-s, --separator <arg> A string of characters to write after each record.
When non-empty, a new line is automatically
appended to the separator.
[default: #]
Common options:
-h, --help Display this message
-n, --no-headers When set, the first row will not be interpreted
as headers. When set, the name of each field
will be its index.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
"#;
use std::{
borrow::Cow,
io::{self, BufWriter, Write},
};
use serde::Deserialize;
use tabwriter::TabWriter;
use crate::{
config::{Config, Delimiter, DEFAULT_WTR_BUFFER_CAPACITY},
util, CliResult,
};
#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_condense: Option<usize>,
flag_field_separator: Option<String>,
flag_separator: String,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
}
pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;
let rconfig = Config::new(args.arg_input.as_ref())
.delimiter(args.flag_delimiter)
.no_headers(args.flag_no_headers);
let mut rdr = rconfig.reader()?;
let headers = rdr.byte_headers()?.clone();
let stdoutlock = io::stdout().lock();
let bufwtr = BufWriter::with_capacity(DEFAULT_WTR_BUFFER_CAPACITY, stdoutlock);
let mut wtr = TabWriter::new(bufwtr);
let mut first = true;
let mut record = csv::ByteRecord::new();
let separator_flag = !args.flag_separator.is_empty();
let separator = args.flag_separator;
let field_separator_flag = args.flag_field_separator.is_some();
let field_separator = args.flag_field_separator.unwrap_or_default().into_bytes();
while rdr.read_byte_record(&mut record)? {
if !first && separator_flag {
writeln!(&mut wtr, "{separator}")?;
}
first = false;
for (i, (header, field)) in headers.iter().zip(&record).enumerate() {
if rconfig.no_headers {
write!(&mut wtr, "{i}")?;
} else {
wtr.write_all(header)?;
}
wtr.write_all(b"\t")?;
if field_separator_flag {
wtr.write_all(&field_separator)?;
}
wtr.write_all(&util::condense(Cow::Borrowed(field), args.flag_condense))?;
wtr.write_all(b"\n")?;
}
}
wtr.flush()?;
Ok(())
}