From ad43efb1126e06d53b01c43e466c2bca2cec31e6 Mon Sep 17 00:00:00 2001
From: Martin Treml <martin.treml@symflower.com>
Date: Thu, 1 Aug 2024 14:07:38 +0200
Subject: [PATCH 1/3] Always run a docker pull when executing the evaluation to
 be on the latest state

Part of #302
---
 cmd/eval-dev-quality/cmd/evaluate.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
index fc6f7635..f932ebd8 100644
--- a/cmd/eval-dev-quality/cmd/evaluate.go
+++ b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -594,6 +594,23 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
 		}()
 	}
 
+	// Pull the image to ensure using the latest version
+	{
+		ctx.Log.Printf("Try pulling %s", command.RuntimeImage)
+		cmd := []string{
+			"docker",
+			"pull",
+			command.RuntimeImage,
+		}
+
+		commandOutput, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{
+			Command: cmd,
+		})
+		if err != nil {
+			ctx.Log.Error("ERROR: Unable to pull image", pkgerrors.WithMessage(pkgerrors.WithStack(err), commandOutput))
+		}
+	}
+
 	// Convert the repositories from the context back to command line arguments.
 	repositoryArgs := make([]string, len(ctx.RepositoryPaths)*2)
 	for i := 0; i < len(repositoryArgs); i = i + 2 {

From a250cff8e56fdab2d3f81beea3da3e3d837af219 Mon Sep 17 00:00:00 2001
From: Simon Bauer <simon.bauer@symflower.com>
Date: Fri, 2 Aug 2024 09:06:18 +0200
Subject: [PATCH 2/3] documentation, Clarify why specifying arguments in
 execution tests overwrites the ones being set in the validation

---
 cmd/eval-dev-quality/cmd/evaluate_test.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
index ecac52bf..1b56e2fd 100644
--- a/cmd/eval-dev-quality/cmd/evaluate_test.go
+++ b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -113,6 +113,8 @@ func TestEvaluateExecute(t *testing.T) {
 		Before func(t *testing.T, logger *log.Logger, resultPath string)
 		After  func(t *testing.T, logger *log.Logger, resultPath string)
 
+		// Arguments holds the command line arguments.
+		// REMARK The "--testdata" and "--result-directory" options are set within the validation logic but specifying them in the argument list here overwrites them.
 		Arguments []string
 
 		ExpectedOutputValidate func(t *testing.T, output string, resultPath string)
@@ -149,7 +151,7 @@ func TestEvaluateExecute(t *testing.T) {
 				"evaluate",
 				"--result-path", filepath.Join(temporaryPath, "result-directory"),
 				"--testdata", filepath.Join("..", "..", "..", "testdata"),
-			}, tc.Arguments...)
+			}, tc.Arguments...) // Add the test case arguments last which allows overwriting "--testdata" and "--result-path" as only the last option counts if specified multiple times (https://pkg.go.dev/github.com/jessevdk/go-flags).
 
 			if tc.ExpectedPanicContains == "" {
 				assert.NotPanics(t, func() {

From 6034372f2c6f59af78eb26088aae411a23d1eb66 Mon Sep 17 00:00:00 2001
From: Martin Treml <martin.treml@symflower.com>
Date: Fri, 2 Aug 2024 09:23:52 +0200
Subject: [PATCH 3/3] fix, Use abolute paths for copying from docker cause
 otherwise docker gets confused with result paths containin colons

Fixes #302
---
 cmd/eval-dev-quality/cmd/evaluate.go      |  6 +++-
 cmd/eval-dev-quality/cmd/evaluate_test.go | 43 +++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go
index f932ebd8..264ae29d 100644
--- a/cmd/eval-dev-quality/cmd/evaluate.go
+++ b/cmd/eval-dev-quality/cmd/evaluate.go
@@ -696,12 +696,16 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) {
 		}()
 
 		// Copy data from volume to filesystem.
+		resultPath, err := filepath.Abs(command.ResultPath)
+		if err != nil {
+			return pkgerrors.Wrap(err, "could not create an absolute path")
+		}
 		output, err = util.CommandWithResult(context.Background(), command.logger, &util.Command{
 			Command: []string{
 				"docker",
 				"cp",
 				"volume-fetch:/data/.",
-				command.ResultPath,
+				resultPath,
 			},
 		})
 		if err != nil {
diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go
index 1b56e2fd..a8d72d66 100644
--- a/cmd/eval-dev-quality/cmd/evaluate_test.go
+++ b/cmd/eval-dev-quality/cmd/evaluate_test.go
@@ -179,6 +179,10 @@ func TestEvaluateExecute(t *testing.T) {
 
 			actualResultFiles, err := osutil.FilesRecursive(temporaryPath)
 			require.NoError(t, err)
+			if len(tc.ExpectedResultFiles) == 0 && len(actualResultFiles) == 0 {
+				return
+			}
+
 			for i, p := range actualResultFiles {
 				actualResultFiles[i], err = filepath.Rel(temporaryPath, p)
 				require.NoError(t, err)
@@ -1058,6 +1062,45 @@ func TestEvaluateExecute(t *testing.T) {
 				},
 			},
 		})
+		{
+			relativeResultDirectory := "temp:test:results"
+			validate(t, &testCase{
+				Name: "Docker with colon in relative results path",
+
+				Before: func(t *testing.T, logger *log.Logger, resultPath string) {
+					t.Cleanup(func() {
+						if _, err := os.Stat(relativeResultDirectory); err != nil {
+							if os.IsNotExist(err) {
+								return
+							}
+							require.NoError(t, err)
+						}
+
+						require.NoError(t, os.RemoveAll(relativeResultDirectory))
+					})
+				},
+
+				Arguments: []string{
+					"--runtime", "docker",
+					"--model", "symflower/symbolic-execution",
+					"--testdata", "testdata/", // Our own tests set the "testdata" argument to the temporary directory that they create. This temporary directory does not exist in docker, so set the "testdata" manually here to overrule the testing behavior and use the original one.
+					"--repository", filepath.Join("golang", "plain"),
+					"--runs=1",
+					"--parallel=1",
+					"--runtime-image=" + dockerImage,
+					"--result-path", relativeResultDirectory,
+				},
+
+				After: func(t *testing.T, logger *log.Logger, resultPath string) {
+					assert.FileExists(t, filepath.Join(relativeResultDirectory, "evaluation.log"))
+					symflowerLogFilePath := filepath.Join(relativeResultDirectory, "symflower_symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain", "evaluation.log")
+					require.FileExists(t, symflowerLogFilePath)
+					data, err := os.ReadFile(symflowerLogFilePath)
+					require.NoError(t, err)
+					assert.Contains(t, string(data), `Evaluating model "symflower/symbolic-execution"`)
+				},
+			})
+		}
 	})
 
 	// This case checks a beautiful bug where the Markdown export crashed when the current working directory contained a README.md file. While this is not the case during the tests (as the current work directory is the directory of this file), it certainly caused problems when our binary was executed from the repository root (which of course contained a README.md). Therefore, we sadly have to modify the current work directory right within the tests of this case to reproduce the problem and fix it forever.