feat: allow performance benchmarks to run using tree-sitter

flowr-analysis · Dec 17, 2024 · cf292e4 · cf292e4
1 parent b80aeaa
commit cf292e4
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 18 deletions.
diff --git a/src/benchmark/slicer.ts b/src/benchmark/slicer.ts
@@ -92,22 +92,20 @@ export class BenchmarkSlicer {
 	private readonly commonMeasurements   = new Measurements<CommonSlicerMeasurements>();
 	private readonly perSliceMeasurements = new Map<SlicingCriteria, PerSliceStats>();
 	private readonly deltas               = new Map<CommonSlicerMeasurements, BenchmarkMemoryMeasurement>();
-	private readonly parser: KnownParser;
-	private stats:           SlicerStats | undefined;
-	private loadedXml:       KnownParserType | undefined;
-	private dataflow:        DataflowInformation | undefined;
-	private normalizedAst:   NormalizedAst | undefined;
-	private totalStopwatch:  IStoppableStopwatch;
+	private readonly parserName: KnownParserName;
+	private stats:               SlicerStats | undefined;
+	private loadedXml:           KnownParserType | undefined;
+	private dataflow:            DataflowInformation | undefined;
+	private normalizedAst:       NormalizedAst | undefined;
+	private totalStopwatch:      IStoppableStopwatch;
 	private finished = false;
 	// Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
-	private executor:        PipelineExecutor<SupportedPipelines> = null as unknown as PipelineExecutor<SupportedPipelines>;
+	private executor:            PipelineExecutor<SupportedPipelines> = null as unknown as PipelineExecutor<SupportedPipelines>;
+	private parser:              KnownParser  = null as unknown as KnownParser;
 
-	constructor(parser: KnownParserName) {
+	constructor(parserName: KnownParserName) {
 		this.totalStopwatch = this.commonMeasurements.start('total');
-		this.parser = this.commonMeasurements.measure(
-			'initialize R session',
-			() => parser === 'r-shell' ? new RShell() : new TreeSitterExecutor()
-		);
+		this.parserName = parserName;
 	}
 
 	/**
@@ -118,6 +116,16 @@ export class BenchmarkSlicer {
 		guard(this.stats === undefined, 'cannot initialize the slicer twice');
 
 		// we know these are in sync so we just cast to one of them
+		this.parser = await this.commonMeasurements.measure(
+			'initialize R session', async() => {
+				if(this.parserName === 'r-shell') {
+					return new RShell();
+				} else {
+					await TreeSitterExecutor.initTreeSitter();
+					return new TreeSitterExecutor();
+				}
+			}
+		);
 		this.executor = createSlicePipeline(this.parser, {
 			request:   { ...request },
 			criterion: [],

diff --git a/src/cli/benchmark-app.ts b/src/cli/benchmark-app.ts
@@ -6,6 +6,7 @@ import { log } from '../util/log';
 import { LimitedThreadPool } from '../util/parallel';
 import { processCommandLineArgs } from './common/script';
 import type { RParseRequestFromFile } from '../r-bridge/retriever';
+import type { KnownParserName } from '../r-bridge/parser';
 
 export interface BenchmarkCliOptions {
 	verbose:  boolean
@@ -16,6 +17,7 @@ export interface BenchmarkCliOptions {
 	parallel: number
 	limit?:   number
 	runs?:    number
+	parser:   KnownParserName
 }
 
 
@@ -75,7 +77,8 @@ async function benchmark() {
 		'--input', f.request.content,
 		'--file-id', `${i}`,
 		'--output', path.join(options.output, path.relative(f.baseDir, `${f.request.content}.json`)),
-		'--slice', options.slice, ...verboseAdd]);
+		'--slice', options.slice, ...verboseAdd,
+		'--parser', options.parser]);
 
 	const runs = options.runs ?? 1;
 	for(let i = 1; i <= runs; i++) {

diff --git a/src/cli/benchmark-helper-app.ts b/src/cli/benchmark-helper-app.ts
@@ -7,6 +7,7 @@ import type { RParseRequestFromFile } from '../r-bridge/retriever';
 import { BenchmarkSlicer } from '../benchmark/slicer';
 import { DefaultAllVariablesFilter } from '../slicing/criterion/filters/all-variables';
 import path from 'path';
+import type { KnownParserName } from '../r-bridge/parser';
 
 
 export interface SingleBenchmarkCliOptions {
@@ -17,6 +18,7 @@ export interface SingleBenchmarkCliOptions {
 	'run-num'?: number
 	slice:      string
 	output?:    string
+	parser:     KnownParserName
 }
 
 const options = processCommandLineArgs<SingleBenchmarkCliOptions>('benchmark-helper', [],{
@@ -53,7 +55,7 @@ async function benchmark() {
 
 	const request: RParseRequestFromFile = { request: 'file', content: options.input };
 
-	const slicer = new BenchmarkSlicer('r-shell');
+	const slicer = new BenchmarkSlicer(options.parser);
 	try {
 		await slicer.init(request);
 

diff --git a/src/cli/common/options.ts b/src/cli/common/options.ts
@@ -29,7 +29,8 @@ export const benchmarkOptions: OptionDefinition[] = [
 	{ name: 'input',        alias: 'i', type: String,  description: 'Pass a folder or file as src to read from', multiple: true, defaultOption: true, defaultValue: [], typeLabel: '{underline files/folders}' },
 	{ name: 'parallel',     alias: 'p', type: String,  description: 'Number of parallel executors (defaults to {italic max(cpu.count-1, 1)})', defaultValue: Math.max(os.cpus().length - 1, 1), typeLabel: '{underline number}' },
 	{ name: 'slice',        alias: 's', type: String,  description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
-	{ name: 'output',       alias: 'o', type: String,  description: `Directory to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`,  typeLabel: '{underline file}' }
+	{ name: 'output',       alias: 'o', type: String,  description: `Directory to write all the measurements to in a per-file-basis (defaults to {italic benchmark-${StartTimeString}})`, defaultValue: `benchmark-${StartTimeString}`,  typeLabel: '{underline file}' },
+	{ name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
 ];
 
 export const benchmarkHelperOptions: OptionDefinition[] = [
@@ -40,6 +41,7 @@ export const benchmarkHelperOptions: OptionDefinition[] = [
 	{ name: 'run-num',      alias: 'r', type: Number, description: 'The n-th time that the file with the given file-id is being benchmarked' },
 	{ name: 'slice',        alias: 's', type: String,  description: 'Automatically slice for *all* variables (default) or *no* slicing and only parsing/dataflow construction. Numbers will indicate: sample X random slices from all.', defaultValue: 'all', typeLabel: '{underline all/no}' },
 	{ name: 'output',       alias: 'o', type: String,  description: 'File to write the measurements to (appends a single line in JSON format)',  typeLabel: '{underline file}' },
+	{ name: 'parser', type: String, description: 'The parser to use for the benchmark', defaultValue: 'r-shell', typeLabel: '{underline parser}' }
 ];
 
 export const exportQuadsOptions: OptionDefinition[] = [

diff --git a/test/performance/run-all-suites.sh b/test/performance/run-all-suites.sh
@@ -7,6 +7,7 @@ PARALLEL="${1-1}"
 # default to running 1 time
 RUNS="${2-1}"
 ONLY_SUITE="${3-}"
+PARSER="${4-"r-shell"}"
 
 
 SUITE_PREFIX="suite-"
@@ -25,7 +26,7 @@ fi
 
 for SUITE in "${SUITES[@]}"; do
   mkdir -p "${OUTPUT_DIR}/${SUITE}"
-  CMD=(bash run-suite.sh "${SUITE}" "$(pwd)/${OUTPUT_DIR}/${SUITE}/${SUITE}" "${PARALLEL}" "${RUNS}")
+  CMD=(bash run-suite.sh "${SUITE}" "$(pwd)/${OUTPUT_DIR}/${SUITE}/${SUITE}" "${PARALLEL}" "${RUNS}" "${PARSER}")
   echo -e "Suite-Command: \"${CMD[*]}\"..."
   "${CMD[@]}"
 done
diff --git a/test/performance/run-suite.sh b/test/performance/run-suite.sh
@@ -3,7 +3,7 @@
 ### Gets a suite name benchmarks the complete suite using the `benchmark` script and summarizes the results.
 
 if [[ -z "$1" || -z "$2" ]]; then
-  printf "No suite name or output file given.\nUsage: %s <suite-name> <output-file> (<process-count>) (<amount-of-runs>)\n" "$0"
+  printf "No suite name or output file given.\nUsage: %s <suite-name> <output-file> (<process-count>) (<amount-of-runs>) (<parser-to-use>)\n" "$0"
   exit 1
 fi
 
@@ -17,6 +17,7 @@ RAW_OUTPUT="${OUT_BASE}-raw"
 PARALLEL="${3-1}"
 # default to running 1 time
 RUNS="${4-1}"
+PARSER="${5-"r-shell"}"
 
 SUITE="suite-${SUITE_NAME}"
 SETUP_SCRIPT="setup.sh"
@@ -37,7 +38,7 @@ echo "done."
 FILES_DIR="$(pwd)/files/"
 
 ## run the benchmark script for each file
-CMD=(npm run benchmark -- --parallel "${PARALLEL}" --runs "${RUNS}" --output "${RAW_OUTPUT}" "${FILES_DIR}")
+CMD=(npm run benchmark -- --parallel "${PARALLEL}" --runs "${RUNS}" --output "${RAW_OUTPUT}" --parser "${PARSER}" "${FILES_DIR}")
 
 echo -e "  * Running: \"${CMD[*]}\"...\033[33m"
 "${CMD[@]}"