sifive · ag-eitilt · Oct 31, 2023 · Nov 3, 2023 · Nov 6, 2023 · Nov 6, 2023
diff --git a/share/wake/lib/system/job.wake b/share/wake/lib/system/job.wake
@@ -114,6 +114,10 @@ export tuple Plan =
     export Stdout: LogLevel
     # How should standard error be displayed during a build
     export Stderr: LogLevel
+    # The workspace-relative paths to which the stdout will be appended
+    export TeeStdout: List String
+    # The workspace-relative paths to which the stderr will be appended
+    export TeeStderr: List String
     # Echo the command to this stream
     export Echo: LogLevel
     # See Persistence table above
@@ -227,7 +231,7 @@ export def editPlanShare (f: Boolean => Boolean): Plan => Plan =
 
 # Get a unique hash-code for the job
 export def getPlanHash (plan: Plan): Integer =
-    def Plan _ cmd _ env dir stdin _ _ _ _ _ _ _ _ isAtty = plan
+    def Plan _ cmd _ env dir stdin _ _ _ _ _ _ _ _ _ _ isAtty = plan
     def isAttyStr = if isAtty then "true" else "false"
 
     def sig =
@@ -275,7 +279,7 @@ def bToInt b =
 
 # Set reasonable defaults for all Plan arguments
 export def makeExecPlan (cmd: List String) (visible: List Path): Plan =
-    Plan "" cmd visible environment "." "" logInfo logWarning logEcho Share Nil defaultUsage id id False
+    Plan "" cmd visible environment "." "" logInfo logWarning Nil Nil logEcho Share Nil defaultUsage id id False
 
 export def makeShellPlan (script: String) (visible: List Path): Plan =
     makeExecPlan (which "dash", "-c", script, Nil) visible
@@ -571,8 +575,8 @@ export def virtualRunner: Runner =
 def implode l =
     cat (foldr (_, "\0", _) Nil l)
 
-def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo stdout stderr label isatty: Job =
-    def create label dir stdin env cmd signature visible keep echo stdout stderr isatty =
+def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echoLevel stdoutLevel stderrLevel stdoutFiles stderrFiles label isatty: Job =
+    def create label dir stdin env cmd signature visible keep echoLevel stdoutLevel stderrLevel stdoutFilesFlat stderrFilesFlat isatty =
         prim "job_create"
 
     def finish job inputs outputs all_outputs status runtime cputime membytes ibytes obytes =
@@ -583,6 +587,13 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
     def signature cmd res fni fno keep = prim "hash"
     def hash = signature cmd res finputs foutputs keep
 
+    def catTeeFiles paths =
+        # Null byte added here rather than via `catWith "\0"` in order to have a null byte after the
+        # *final* path as well, as required by `split_null`.
+        map ("{simplify _}\0") paths
+        | distinctBy scmp
+        | cat
+
     def build Unit =
         def visStrings = map getPathName vis
 
@@ -596,9 +607,11 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
             hash
             visStrings.implode
             (bToInt keep)
-            echo
-            stdout
-            stderr
+            echoLevel
+            stdoutLevel
+            stderrLevel
+            (catTeeFiles stdoutFiles)
+            (catTeeFiles stderrFiles)
             (bToInt isatty)
 
         def prefix = str (getJobId job)
@@ -620,6 +633,7 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
 
                 def output =
                     foutputs outputs
+                    | filter (\f !exists (f ==* _) (stdoutFiles ++ stderrFiles))
                     | computeHashes prefix
                     | implode
 
@@ -655,10 +669,10 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
                 Pair Nil last -> confirm False last (build Unit)
 
 # Only run if the first four arguments differ
-target runOnce cmd env dir stdin vis isatty run \ res usage finputs foutputs keep echo stdout stderr label =
-    runAlways cmd env dir stdin res usage finputs foutputs vis keep run echo stdout stderr label isatty
+target runOnce cmd env dir stdin vis isatty run \ res usage finputs foutputs keep echoLevel stdoutLevel stderrLevel stdoutFiles stderrFiles label =
+    runAlways cmd env dir stdin res usage finputs foutputs vis keep run echoLevel stdoutLevel stderrLevel stdoutFiles stderrFiles label isatty
 
-export def runJobImp label cmd env dir stdin res usage finputs foutputs vis pers run (LogLevel echo) (LogLevel stdout) (LogLevel stderr) isatty =
+export def runJobImp label cmd env dir stdin res usage finputs foutputs vis pers run (LogLevel echo) (LogLevel stdout) (LogLevel stderr) stdoutFiles stderrFiles isatty =
     if isOnce pers then
         runOnce
         cmd
@@ -676,6 +690,8 @@ export def runJobImp label cmd env dir stdin res usage finputs foutputs vis pers
         echo
         stdout
         stderr
+        stdoutFiles
+        stderrFiles
         label
     else
         runAlways
@@ -693,11 +709,13 @@ export def runJobImp label cmd env dir stdin res usage finputs foutputs vis pers
         echo
         stdout
         stderr
+        stdoutFiles
+        stderrFiles
         label
         isatty
 
-export def runJobWith (Runner _ _ run) (Plan label cmd vis env dir stdin stdout stderr echo pers res usage finputs foutputs isatty) =
-    runJobImp label cmd env dir stdin res usage finputs foutputs vis pers run echo stdout stderr isatty
+export def runJobWith (Runner _ _ run) (Plan label cmd vis env dir stdin stdoutLevel stderrLevel stdoutFiles stderrFiles echoLevel pers res usage finputs foutputs isatty) =
+    runJobImp label cmd env dir stdin res usage finputs foutputs vis pers run echoLevel stdoutLevel stderrLevel stdoutFiles stderrFiles isatty
 
 # Set the value of a tag on a Job
 # This is useful for post-build reflection into the database

diff --git a/share/wake/lib/system/path.wake b/share/wake/lib/system/path.wake
@@ -179,7 +179,7 @@ def computeHashes (prefix: String) (files: List String): List String =
     def add f h = prim "add_hash"
 
     def hashPlan cmd vis =
-        Plan "<hash>" cmd vis Nil "." "" logNever logError logDebug ReRun Nil hashUsage id id False
+        Plan "<hash>" cmd vis Nil "." "" logNever logError Nil Nil logDebug ReRun Nil hashUsage id id False
 
     def stdin_file_path = "to_hash.{prefix}.stdin"
 
@@ -240,7 +240,7 @@ target hashcode (f: String): String =
         reuse
     else
         def hashPlan cmd =
-            Plan "" cmd Nil Nil "." "" logNever logError logDebug ReRun Nil hashUsage id id False
+            Plan "" cmd Nil Nil "." "" logNever logError Nil Nil logDebug ReRun Nil hashUsage id id False
 
         def job =
             hashPlan ("<hash>", f, Nil)
@@ -259,7 +259,7 @@ target hashcode (f: String): String =
 # Allow an untracked file to be removed via `wake --clean`
 export target markFileCleanable (filepath: String): Result Unit Error =
     def hashPlan cmd =
-        Plan "" cmd Nil Nil "." "" logNever logError logDebug ReRun Nil hashUsage id id False
+        Plan "" cmd Nil Nil "." "" logNever logError Nil Nil logDebug ReRun Nil hashUsage id id False
 
     def job =
         hashPlan ("<hash>", filepath, Nil)

diff --git a/share/wake/lib/system/plan_scorer.wake b/share/wake/lib/system/plan_scorer.wake
@@ -25,7 +25,7 @@ publish runner =
 
 # Run a job, via a Runner chosen based on 'score' functions.
 export def runJob (p: Plan): Job = match p
-    Plan label cmd vis env dir stdin stdout stderr echo pers res usage finputs foutputs isatty ->
+    Plan label cmd vis env dir stdin stdoutLevel stderrLevel stdoutFiles stderrFiles echoLevel pers res usage finputs foutputs isatty ->
         def implode l = cat (foldr (_, "\0", _) Nil l)
         def bToInt b = if b then 1 else 0
 
@@ -50,16 +50,18 @@ export def runJob (p: Plan): Job = match p
 
         match (opts | foldl best (Pair 0.0 None) | getPairSecond)
             Some r ->
-                runJobImp label cmd env dir stdin res usage finputs foutputs vis pers r echo stdout stderr isatty
+                runJobImp label cmd env dir stdin res usage finputs foutputs vis pers r echoLevel stdoutLevel stderrLevel stdoutFiles stderrFiles isatty
             None ->
-                def create label dir stdin env cmd signature visible keep echo stdout stderr isatty =
+                def create label dir stdin env cmd signature visible keep echoLevel stdoutLevel stderrLevel stdoutFilesFlat stderrFilesFlat isatty =
                     prim "job_create"
 
                 def badfinish job e = prim "job_fail_finish"
                 def badlaunch job e = prim "job_fail_launch"
+                def stdoutFilesFlat = catWith "\0" stdoutFiles
+                def stderrFilesFlat = catWith "\0" stderrFiles
 
                 def job =
-                    create label dir stdin env.implode cmd.implode 0 "" 0 "echo" "info" "error" (bToInt isatty)
+                    create label dir stdin env.implode cmd.implode 0 "" 0 "echo" "info" stdoutFilesFlat "error" stderrFilesFlat (bToInt isatty)
 
                 def error =
                     def pretty = match _

diff --git a/src/runtime/job.cpp b/src/runtime/job.cpp
@@ -39,6 +39,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <fstream>
 #include <iostream>
 #include <limits>
 #include <list>
@@ -106,6 +107,7 @@ struct Job final : public GCObject<Job, Value> {
   std::string echo;
   std::string stream_out;
   std::string stream_err;
+  std::string stdout_teefiles, stderr_teefiles;
   HeapPointer<Value> bad_launch;
   HeapPointer<Value> bad_finish;
   double pathtime;
@@ -125,7 +127,7 @@ struct Job final : public GCObject<Job, Value> {
 
   Job(Database *db_, String *label_, String *dir_, String *stdin_file_, String *environ,
       String *cmdline_, bool keep, const char *echo, const char *stream_out,
-      const char *stream_err);
+      const char *stream_err, std::string stdout_teefiles, std::string stderr_teefiles);
 
   template <typename T, T (HeapPointerBase::*memberfn)(T x)>
   T recurse(T arg);
@@ -276,16 +278,20 @@ struct JobEntry {
   std::list<Status>::iterator status;
   std::unique_ptr<std::streambuf> stdout_linebuf;
   std::unique_ptr<std::streambuf> stderr_linebuf;
+  std::vector<std::string> stdout_teefiles;
+  std::vector<std::string> stderr_teefiles;
 
   JobEntry(JobTable::detail *imp_, RootPointer<Job> &&job_, std::unique_ptr<std::streambuf> stdout,
-           std::unique_ptr<std::streambuf> stderr)
+           std::unique_ptr<std::streambuf> stderr, std::vector<std::string> stdout_teefiles_, std::vector<std::string> stderr_teefiles_)
       : imp(imp_),
         job(std::move(job_)),
         pid(0),
         pipe_stdout(-1),
         pipe_stderr(-1),
         stdout_linebuf(std::move(stdout)),
-        stderr_linebuf(std::move(stderr)) {}
+        stderr_linebuf(std::move(stderr)),
+        stdout_teefiles(std::move(stdout_teefiles_)),
+        stderr_teefiles(std::move(stderr_teefiles_)) {}
   ~JobEntry();
 
   double runtime(struct timespec now);
@@ -323,6 +329,8 @@ struct JobTable::detail {
   std::unordered_map<int, std::unique_ptr<std::streambuf>> fd_bufs;
   std::unordered_map<int, std::unique_ptr<TermInfoBuf>> term_bufs;
 
+  std::unordered_map<std::string, std::unique_ptr<std::ofstream>> teefiles;
+
   detail() {}
 
   ~detail() {
@@ -339,6 +347,9 @@ struct JobTable::detail {
     for (auto &entry : term_bufs) {
       entry.second.release();
     }
+    for (auto &entry : teefiles) {
+      entry.second->close();
+    }
   }
 
   CriticalJob critJob(double nexttime) const;
@@ -783,7 +794,6 @@ static void launch(JobTable *jobtable) {
     // Make the raw output streams and the TermInfoBufs
     // that jobs will use. We make one TermInfoBuf per
     // file descriptor that we're outputting to.
-    // TODO: We could add file tee-ing here as well
     if (!jobtable->imp->fd_bufs.count(fd_out)) {
       std::unique_ptr<std::streambuf> fd_buf;
       if (fd_out != -1) {
@@ -824,8 +834,28 @@ static void launch(JobTable *jobtable) {
     } else {
       err = std::make_unique<NullBuf>();
     }
+
+    auto stdout_teefiles = split_null(task.job->stdout_teefiles);
+    auto stdout_tee_names = std::vector<std::string>();
+    for (int i = 0; stdout_teefiles[i]; ++i) {
+      stdout_tee_names.push_back(stdout_teefiles[i]);
+      if (jobtable->imp->teefiles.find(stdout_teefiles[i]) == jobtable->imp->teefiles.end()) {
+        jobtable->imp->teefiles[stdout_teefiles[i]] = std::make_unique<std::ofstream>(stdout_teefiles[i], std::ios::out | std::ios::trunc);
+      }
+    }
+    auto stderr_teefiles = split_null(task.job->stderr_teefiles);
+    auto stderr_tee_names = std::vector<std::string>();
+    for (int i = 0; stderr_teefiles[i]; ++i) {
+      stderr_tee_names.push_back(stderr_teefiles[i]);
+      if (jobtable->imp->teefiles.find(stderr_teefiles[i]) == jobtable->imp->teefiles.end()) {
+        jobtable->imp->teefiles[stderr_teefiles[i]] = std::make_unique<std::ofstream>(stderr_teefiles[i], std::ios::out | std::ios::trunc);
+      }
+    }
+    delete[] stdout_teefiles;
+    delete[] stderr_teefiles;
+
     std::shared_ptr<JobEntry> entry = std::make_shared<JobEntry>(
-        jobtable->imp.get(), std::move(task.job), std::move(out), std::move(err));
+        jobtable->imp.get(), std::move(task.job), std::move(out), std::move(err), std::move(stdout_tee_names), std::move(stderr_tee_names));
 
     int stdout_stream[2];
     int stderr_stream[2];
@@ -983,6 +1013,10 @@ bool JobTable::wait(Runtime &runtime) {
           entry->job->db->save_output(entry->job->job, 1, buffer, got, entry->runtime(now));
           if (!imp->batch) {
             entry->stdout_linebuf->sputn(buffer, got);
+            for (auto &teefile : entry->stdout_teefiles) {
+              auto &tee_fd = imp->teefiles.at(teefile);
+              tee_fd->write(buffer, got);
+            }
           }
         }
       }
@@ -1002,6 +1036,10 @@ bool JobTable::wait(Runtime &runtime) {
           entry->job->db->save_output(entry->job->job, 2, buffer, got, entry->runtime(now));
           if (!imp->batch) {
             entry->stderr_linebuf->sputn(buffer, got);
+            for (auto &teefile : entry->stderr_teefiles) {
+              auto &tee_fd = imp->teefiles.at(teefile);
+              tee_fd->write(buffer, got);
+            }
           }
         }
       }
@@ -1083,7 +1121,7 @@ bool JobTable::wait(Runtime &runtime) {
 
 Job::Job(Database *db_, String *label_, String *dir_, String *stdin_file_, String *environ,
          String *cmdline_, bool keep_, const char *echo_, const char *stream_out_,
-         const char *stream_err_)
+         const char *stream_err_, std::string stdout_teefiles_, std::string stderr_teefiles_)
     : db(db_),
       label(label_),
       cmdline(cmdline_),
@@ -1096,7 +1134,9 @@ Job::Job(Database *db_, String *label_, String *dir_, String *stdin_file_, Strin
       keep(keep_),
       echo(echo_),
       stream_out(stream_out_),
-      stream_err(stream_err_) {
+      stream_err(stream_err_),
+      stdout_teefiles(stdout_teefiles_),
+      stderr_teefiles(stderr_teefiles_) {
   start.tv_sec = stop.tv_sec = 0;
   start.tv_nsec = stop.tv_nsec = 0;
 
@@ -1286,18 +1326,19 @@ static PRIMFN(prim_job_virtual) {
 }
 
 static PRIMTYPE(type_job_create) {
-  return args.size() == 12 && args[0]->unify(Data::typeString) &&
+  return args.size() == 14 && args[0]->unify(Data::typeString) &&
          args[1]->unify(Data::typeString) && args[2]->unify(Data::typeString) &&
          args[3]->unify(Data::typeString) && args[4]->unify(Data::typeString) &&
          args[5]->unify(Data::typeInteger) && args[6]->unify(Data::typeString) &&
          args[7]->unify(Data::typeInteger) && args[8]->unify(Data::typeString) &&
          args[9]->unify(Data::typeString) && args[10]->unify(Data::typeString) &&
-         args[11]->unify(Data::typeInteger) && out->unify(Data::typeJob);
+         args[11]->unify(Data::typeString) && args[12]->unify(Data::typeString) &&
+         args[13]->unify(Data::typeInteger) && out->unify(Data::typeJob);
 }
 
 static PRIMFN(prim_job_create) {
   JobTable *jobtable = static_cast<JobTable *>(data);
-  EXPECT(12);
+  EXPECT(14);
   STRING(label, 0);
   STRING(dir, 1);
   STRING(stdin_file, 2);
@@ -1309,15 +1350,17 @@ static PRIMFN(prim_job_create) {
   STRING(echo, 8);
   STRING(stream_out, 9);
   STRING(stream_err, 10);
-  INTEGER_MPZ(is_atty, 11);
+  STRING(stdout_teefiles, 11);
+  STRING(stderr_teefiles, 12);
+  INTEGER_MPZ(is_atty, 13);
 
   Hash hash;
   REQUIRE(mpz_sizeinbase(signature, 2) <= 8 * sizeof(hash.data));
   mpz_export(&hash.data[0], 0, 1, sizeof(hash.data[0]), 0, 0, signature);
 
   Job *out =
       Job::alloc(runtime.heap, jobtable->imp->db, label, dir, stdin_file, env, cmd,
-                 mpz_cmp_si(keep, 0), echo->c_str(), stream_out->c_str(), stream_err->c_str());
+                 mpz_cmp_si(keep, 0), echo->c_str(), stream_out->c_str(), stream_err->c_str(), stdout_teefiles->as_str(), stderr_teefiles->as_str());
 
   out->record = jobtable->imp->db->predict_job(out->code.data[0], &out->pathtime);
 
@@ -1396,7 +1439,7 @@ static PRIMFN(prim_job_cache) {
   Value *joblist;
   if (reuse.found && !jobtable->imp->check) {
     Job *jobp = Job::claim(runtime.heap, jobtable->imp->db, dir, dir, stdin_file, env, cmd, true,
-                           STREAM_ECHO, STREAM_INFO, STREAM_WARNING);
+                           STREAM_ECHO, STREAM_INFO, STREAM_WARNING, "", "");
     jobp->state = STATE_FORKED | STATE_STDOUT | STATE_STDERR | STATE_MERGED | STATE_FINISHED;
     jobp->job = job;
     jobp->record = reuse;