forked from ziglang/gotta-go-fast
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.zig
406 lines (364 loc) · 17.3 KB
/
build.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
const std = @import("std");
pub fn build(b: *std.Build) !void {
const backfill = b.option(bool, "backfill", "Backfill data for previous Zig versions") orelse false;
if (backfill) {
// Parse arguments
const args = b.args.?;
const records_csv_path = args[0];
const zig_git_dir_path = args[1];
const commits_txt_path = args[2];
// Load commits
var commit_raw_iter = std.mem.tokenize(u8, try std.fs.cwd().readFileAlloc(b.allocator, commits_txt_path, 2 * 1024 * 1024), "\r\n");
// Create paths
const zig_build_dir_path = try std.fs.path.join(b.allocator, &.{ zig_git_dir_path, "build-backfill" });
const zig_exe_path = try std.fs.path.join(b.allocator, &.{ zig_build_dir_path, "stage3/bin/zig" });
// Run benchmarks for each commit
while (commit_raw_iter.next()) |commit_raw| {
// Check out commit
std.debug.print("Checking out {s} to backfill...\n", .{commit_raw});
_ = try std.ChildProcess.exec(.{ .allocator = b.allocator, .argv = &.{ "git", "checkout", commit_raw }, .cwd = zig_git_dir_path });
// Touch CMakeLists.txt to pick up the new Zig version
_ = try std.ChildProcess.exec(.{ .allocator = b.allocator, .argv = &.{ "touch", "CMakeLists.txt" }, .cwd = zig_git_dir_path });
// Build Zig
std.debug.print("Building Zig to {s}...\n", .{zig_exe_path});
_ = try std.ChildProcess.exec(.{ .allocator = b.allocator, .argv = &.{"ninja"}, .cwd = zig_build_dir_path });
// Parse Zig version
const zig_version = std.mem.trimRight(u8, b.exec(&.{ zig_exe_path, "version" }), "\r\n");
// Parse commit
const commit = try parseCommit(commit_raw);
// Parse commit timestamp
const commit_timestamp_result = try std.ChildProcess.exec(.{ .allocator = b.allocator, .argv = &.{ "git", "log", "-n1", commit_raw, "--pretty=format:%at" }, .cwd = zig_git_dir_path });
const commit_timestamp_raw = commit_timestamp_result.stdout;
const commit_timestamp = try std.fmt.parseInt(u64, std.mem.trimRight(u8, commit_timestamp_raw, "\r\n"), 10);
// Collect measurements
try collectMeasurements(b, records_csv_path, zig_exe_path, zig_version, commit, commit_timestamp);
}
} else {
// Parse arguments
const args = b.args.?;
const records_csv_path = args[0];
const zig_exe_path = args[1];
const zig_version = std.mem.trimRight(u8, b.exec(&.{ zig_exe_path, "version" }), "\r\n");
const commit = try parseCommit(args[2]);
const commit_timestamp = try std.fmt.parseInt(u64, std.mem.trimRight(u8, args[3], "\r\n"), 10);
// Collect measurements
try collectMeasurements(b, records_csv_path, zig_exe_path, zig_version, commit, commit_timestamp);
}
}
const RecordList = std.ArrayList(Record);
const CommitTable = std.HashMap(Record.Key, usize, CommitTableContext, std.hash_map.default_max_load_percentage);
const CommitTableContext = struct {
pub fn eql(_: CommitTableContext, a: Record.Key, b: Record.Key) bool {
return a.eql(b);
}
pub fn hash(_: CommitTableContext, key: Record.Key) u64 {
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHashStrat(&hasher, key, .Deep);
return hasher.final();
}
};
const Record = struct {
timestamp: u64,
benchmark_name: []const u8,
commit_hash: [20]u8,
commit_timestamp: u64,
zig_version: []const u8,
error_message: []const u8 = &[0]u8{},
samples_taken: u64 = 0,
wall_time_median: u64 = 0,
wall_time_mean: u64 = 0,
wall_time_min: u64 = 0,
wall_time_max: u64 = 0,
utime_median: u64 = 0,
utime_mean: u64 = 0,
utime_min: u64 = 0,
utime_max: u64 = 0,
stime_median: u64 = 0,
stime_mean: u64 = 0,
stime_min: u64 = 0,
stime_max: u64 = 0,
cpu_cycles_median: u64 = 0,
cpu_cycles_mean: u64 = 0,
cpu_cycles_min: u64 = 0,
cpu_cycles_max: u64 = 0,
instructions_median: u64 = 0,
instructions_mean: u64 = 0,
instructions_min: u64 = 0,
instructions_max: u64 = 0,
cache_references_median: u64 = 0,
cache_references_mean: u64 = 0,
cache_references_min: u64 = 0,
cache_references_max: u64 = 0,
cache_misses_median: u64 = 0,
cache_misses_mean: u64 = 0,
cache_misses_min: u64 = 0,
cache_misses_max: u64 = 0,
branch_misses_median: u64 = 0,
branch_misses_mean: u64 = 0,
branch_misses_min: u64 = 0,
branch_misses_max: u64 = 0,
maxrss: u64 = 0,
const Key = struct {
commit_hash: [20]u8,
benchmark_name: []const u8,
fn eql(self: Key, other: Key) bool {
return std.mem.eql(u8, &self.commit_hash, &other.commit_hash) and
std.mem.eql(u8, self.benchmark_name, other.benchmark_name);
}
};
};
fn collectMeasurements(b: *std.Build, records_csv_path: []const u8, zig_exe_path: []const u8, zig_version: []const u8, commit: [20]u8, commit_timestamp: u64) !void {
std.debug.print("Collecting measurements for Zig version {s} commit timestamp {d}...\n", .{ zig_version, commit_timestamp });
// Parse manifest
const manifest = try std.json.parseFromSlice(std.json.Value, b.allocator, @embedFile("manifest.json"), .{});
defer manifest.deinit();
// Load records
var record_list = RecordList.init(b.allocator);
defer record_list.deinit();
var commit_table = CommitTable.init(b.allocator);
defer commit_table.deinit();
try loadCsv(b.allocator, records_csv_path, &record_list, &commit_table);
try record_list.ensureUnusedCapacity(manifest.value.object.count() * 2);
const timestamp: u64 = @intCast(std.time.timestamp());
// Run benchmarks
var benchmark_iter = manifest.value.object.iterator();
while (benchmark_iter.next()) |benchmark| {
// Parse benchmark
const name = benchmark.key_ptr.*;
const dir_name = benchmark.value_ptr.object.get("dir").?.string;
const main_basename = benchmark.value_ptr.object.get("mainPath").?.string;
const main_path = try std.fs.path.join(b.allocator, &.{ "src", dir_name, main_basename });
// Build benchmark
const mod = try std.fmt.allocPrint(b.allocator, "app::{s}", .{main_path});
_ = b.exec(&.{ zig_exe_path, "build-exe", "-O", "ReleaseFast", "--deps", "app", "--mod", mod, "src/bench.zig" });
// Run benchmark
std.debug.print("Running '{s}' for {}...\n", .{ name, std.fmt.fmtSliceHexLower(&commit) });
const bench_output = b.exec(&.{ "./bench", zig_exe_path });
// Parse output
const bench_json = try std.json.parseFromSlice(std.json.Value, b.allocator, bench_output, .{});
defer bench_json.deinit();
const record = try jsonToRecord(b.allocator, bench_json.value, timestamp, name, commit, zig_version, commit_timestamp);
const key = Record.Key{ .commit_hash = record.commit_hash, .benchmark_name = record.benchmark_name };
// Save record
const main_gop = try commit_table.getOrPut(key);
if (main_gop.found_existing) {
record_list.items[main_gop.value_ptr.*] = record;
} else {
main_gop.value_ptr.* = record_list.items.len;
record_list.appendAssumeCapacity(record);
}
}
// Save records
try saveCsv(b.allocator, records_csv_path, record_list.items);
}
fn loadCsv(allocator: std.mem.Allocator, records_csv_path: []const u8, record_list: *RecordList, commit_table: *CommitTable) !void {
const csv_text = try std.fs.cwd().readFileAlloc(allocator, records_csv_path, 2 * 1024 * 1024 * 1024);
defer allocator.free(csv_text);
var field_indexes: [@typeInfo(Record).Struct.fields.len]usize = undefined;
var seen_fields = [1]bool{false} ** field_indexes.len;
var line_it = std.mem.split(u8, csv_text, "\n");
{
const first_line = line_it.next() orelse {
std.debug.print("empty Csv file", .{});
std.process.exit(1);
};
var csv_index: usize = 0;
var it = std.mem.split(u8, first_line, ",");
while (it.next()) |field_name| : (csv_index += 1) {
if (csv_index >= field_indexes.len) {
std.debug.print("extra Csv field: {s}\n", .{field_name});
std.process.exit(1);
}
const field_index = fieldIndex(Record, field_name) orelse {
std.debug.print("bad Csv field name: {s}\n", .{field_name});
std.process.exit(1);
};
field_indexes[csv_index] = field_index;
seen_fields[field_index] = true;
}
inline for (@typeInfo(Record).Struct.fields, 0..) |field, i| {
if (!seen_fields[i]) {
std.debug.print("missing Csv field: {s}", .{field.name});
std.process.exit(1);
}
}
}
var line_index: usize = 1;
while (line_it.next()) |line| : (line_index += 1) {
if (std.mem.eql(u8, line, "")) continue;
var it = std.mem.split(u8, line, ",");
var csv_index: usize = 0;
const record_index = record_list.items.len;
const record = try record_list.addOne();
while (it.next()) |field| : (csv_index += 1) {
if (csv_index >= field_indexes.len) {
std.debug.print("extra Csv field on line {d}\n", .{line_index + 1});
std.process.exit(1);
}
setRecordField(allocator, record, field, field_indexes[csv_index]);
}
if (csv_index != field_indexes.len) {
std.debug.print("Csv line {d} missing a field\n", .{line_index + 1});
std.process.exit(1);
}
const key: Record.Key = .{
.commit_hash = record.commit_hash,
.benchmark_name = record.benchmark_name,
};
if (try commit_table.fetchPut(key, record_index)) |existing| {
_ = commit_table.putAssumeCapacity(key, existing.value);
record_list.shrinkRetainingCapacity(record_list.items.len - 1);
}
}
}
fn saveCsv(allocator: std.mem.Allocator, records_csv_path: []const u8, records: []Record) !void {
const baf = try std.io.BufferedAtomicFile.create(allocator, std.fs.cwd(), records_csv_path, .{});
defer baf.destroy();
const out = baf.writer();
inline for (@typeInfo(Record).Struct.fields, 0..) |field, i| {
if (i != 0) {
try out.writeAll(",");
}
try out.writeAll(field.name);
}
try out.writeAll("\n");
for (records) |record| {
try writeCsvRecord(out, record);
try out.writeAll("\n");
}
try baf.finish();
}
fn jsonToRecord(
allocator: std.mem.Allocator,
mo: std.json.Value,
timestamp: u64,
benchmark_name: []const u8,
commit_hash: [20]u8,
zig_version: []const u8,
commit_timestamp: u64,
) !Record {
var record: Record = .{
.timestamp = timestamp,
.benchmark_name = try allocator.dupe(u8, benchmark_name),
.commit_hash = commit_hash,
.commit_timestamp = commit_timestamp,
.zig_version = zig_version,
};
if (mo == .string) {
record.error_message = try allocator.dupe(u8, mo.string);
} else {
const ok = mo.object.get("ok").?.object;
record.samples_taken = @as(u64, @intCast(ok.get("samples_taken").?.integer));
record.wall_time_median = @as(u64, @intCast(ok.get("wall_time").?.object.get("median").?.integer));
record.wall_time_mean = @as(u64, @intCast(ok.get("wall_time").?.object.get("mean").?.integer));
record.wall_time_min = @as(u64, @intCast(ok.get("wall_time").?.object.get("min").?.integer));
record.wall_time_max = @as(u64, @intCast(ok.get("wall_time").?.object.get("max").?.integer));
record.utime_median = @as(u64, @intCast(ok.get("utime").?.object.get("median").?.integer));
record.utime_mean = @as(u64, @intCast(ok.get("utime").?.object.get("mean").?.integer));
record.utime_min = @as(u64, @intCast(ok.get("utime").?.object.get("min").?.integer));
record.utime_max = @as(u64, @intCast(ok.get("utime").?.object.get("max").?.integer));
record.stime_median = @as(u64, @intCast(ok.get("stime").?.object.get("median").?.integer));
record.stime_mean = @as(u64, @intCast(ok.get("stime").?.object.get("mean").?.integer));
record.stime_min = @as(u64, @intCast(ok.get("stime").?.object.get("min").?.integer));
record.stime_max = @as(u64, @intCast(ok.get("stime").?.object.get("max").?.integer));
record.cpu_cycles_median = @as(u64, @intCast(ok.get("cpu_cycles").?.object.get("median").?.integer));
record.cpu_cycles_mean = @as(u64, @intCast(ok.get("cpu_cycles").?.object.get("mean").?.integer));
record.cpu_cycles_min = @as(u64, @intCast(ok.get("cpu_cycles").?.object.get("min").?.integer));
record.cpu_cycles_max = @as(u64, @intCast(ok.get("cpu_cycles").?.object.get("max").?.integer));
record.instructions_median = @as(u64, @intCast(ok.get("instructions").?.object.get("median").?.integer));
record.instructions_mean = @as(u64, @intCast(ok.get("instructions").?.object.get("mean").?.integer));
record.instructions_min = @as(u64, @intCast(ok.get("instructions").?.object.get("min").?.integer));
record.instructions_max = @as(u64, @intCast(ok.get("instructions").?.object.get("max").?.integer));
record.cache_references_median = @as(u64, @intCast(ok.get("cache_references").?.object.get("median").?.integer));
record.cache_references_mean = @as(u64, @intCast(ok.get("cache_references").?.object.get("mean").?.integer));
record.cache_references_min = @as(u64, @intCast(ok.get("cache_references").?.object.get("min").?.integer));
record.cache_references_max = @as(u64, @intCast(ok.get("cache_references").?.object.get("max").?.integer));
record.cache_misses_median = @as(u64, @intCast(ok.get("cache_misses").?.object.get("median").?.integer));
record.cache_misses_mean = @as(u64, @intCast(ok.get("cache_misses").?.object.get("mean").?.integer));
record.cache_misses_min = @as(u64, @intCast(ok.get("cache_misses").?.object.get("min").?.integer));
record.cache_misses_max = @as(u64, @intCast(ok.get("cache_misses").?.object.get("max").?.integer));
record.branch_misses_median = @as(u64, @intCast(ok.get("branch_misses").?.object.get("median").?.integer));
record.branch_misses_mean = @as(u64, @intCast(ok.get("branch_misses").?.object.get("mean").?.integer));
record.branch_misses_min = @as(u64, @intCast(ok.get("branch_misses").?.object.get("min").?.integer));
record.branch_misses_max = @as(u64, @intCast(ok.get("branch_misses").?.object.get("max").?.integer));
record.maxrss = @as(u64, @intCast(ok.get("maxrss").?.integer));
}
return record;
}
fn fieldIndex(comptime T: type, name: []const u8) ?usize {
inline for (@typeInfo(T).Struct.fields, 0..) |field, i| {
if (std.mem.eql(u8, field.name, name))
return i;
}
return null;
}
fn setRecordField(allocator: std.mem.Allocator, record: *Record, data: []const u8, index: usize) void {
inline for (@typeInfo(Record).Struct.fields, 0..) |field, i| {
if (i == index) {
setRecordFieldT(allocator, field.type, &@field(record, field.name), data);
return;
}
}
unreachable;
}
fn setRecordFieldT(allocator: std.mem.Allocator, comptime T: type, ptr: *T, data: []const u8) void {
if (@typeInfo(T) == .Enum) {
ptr.* = std.meta.stringToEnum(T, data) orelse {
std.debug.print("bad enum value: {d}\n", .{data});
std.process.exit(1);
};
return;
}
switch (T) {
u64 => {
ptr.* = std.fmt.parseInt(u64, data, 10) catch |err| {
std.debug.print("bad u64 value '{d}': {s}\n", .{ data, @errorName(err) });
std.process.exit(1);
};
},
[]const u8 => {
ptr.* = allocator.dupe(u8, data) catch @panic("out of memory");
},
[20]u8 => {
ptr.* = parseCommit(data) catch |err| {
std.debug.print("wrong format for commit hash: '{d}': {s}", .{ data, @errorName(err) });
std.process.exit(1);
};
},
else => @compileError("no deserialization for " ++ @typeName(T)),
}
}
fn writeCsvRecord(out: anytype, record: Record) !void {
inline for (@typeInfo(Record).Struct.fields, 0..) |field, i| {
if (i != 0) {
try out.writeAll(",");
}
try writeCsvRecordField(out, @field(record, field.name));
}
}
fn writeCsvRecordField(out: anytype, field: anytype) !void {
const T = @TypeOf(field);
if (@typeInfo(T) == .Enum) {
return out.writeAll(@tagName(field));
}
switch (T) {
u64 => return out.print("{}", .{field}),
[]const u8 => return out.writeAll(field),
[20]u8 => return out.print("{}", .{std.fmt.fmtSliceHexLower(&field)}),
else => @compileError("unsupported writeCsvRecordField type: " ++ @typeName(T)),
}
}
fn parseCommit(text: []const u8) ![20]u8 {
var result: [20]u8 = undefined;
if (text.len != 40) {
return error.WrongShaLength;
}
var i: usize = 0;
while (i < 20) : (i += 1) {
const byte = std.fmt.parseInt(u8, text[i * 2 ..][0..2], 16) catch {
return error.BadShaCharacter;
};
result[i] = byte;
}
return result;
}