From 798eaa531902ac8a5dfb1b555e9bdbf98af44d28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Thu, 21 Oct 2021 13:18:04 +0300
Subject: [PATCH 01/13] Add scripts for regenerating the expected outputs for
 adaptive tests

---
 tests/_self-adaptive/gen-costs.py             | 176 ++++++++++++++++++
 .../regenerate-expected-outputs.sh            |  35 ++++
 2 files changed, 211 insertions(+)
 create mode 100755 tests/_self-adaptive/gen-costs.py
 create mode 100755 tests/_self-adaptive/regenerate-expected-outputs.sh

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
new file mode 100755
index 00000000..afcf993b
--- /dev/null
+++ b/tests/_self-adaptive/gen-costs.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+import argparse as ag
+import os
+import sys
+import glob
+import subprocess as sp
+import re
+import tempfile
+import shutil
+import dataclasses
+from dataclasses import dataclass
+
+@dataclass
+class MarianTrainConfig:
+    marian_dir: str
+    model: str
+    vocab1: str
+    vocab2: str
+    dim_vocab1: int
+    dim_vocab2: int
+    epochs: int
+
+
+def main():
+    parser = ag.ArgumentParser()
+    parser.add_argument('-t', '--train-sets',
+                        type=ag.FileType('r', encoding='utf-8'), nargs=2, required=True)
+    parser.add_argument('-i', '--input', type=ag.FileType('r', encoding='utf-8'), required=True)
+    parser.add_argument('-m', '--model', required=True)
+    parser.add_argument('-v', '--vocabs', nargs=2, required=True)
+    parser.add_argument('-e', '--epochs', type=int, required=True)
+    parser.add_argument('--marian-dir', required=True)
+    parser.add_argument('--output-costs', type=ag.FileType('w', encoding='utf-8'))
+    parser.add_argument('--output-transl', type=ag.FileType('w', encoding='utf-8'))
+
+    args = parser.parse_args()
+
+    [sfile, tfile] = args.train_sets
+    [vocab1, vocab2] = args.vocabs
+    config = MarianTrainConfig(args.marian_dir, args.model, vocab1, vocab2, 85000, 85000, args.epochs)
+    costs_and_translations = iterate_over_inputs(config, sfile, tfile, args.input)
+    output_costs_and_translations(costs_and_translations, args.output_costs, args.output_transl)
+
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+def iterate_over_inputs(config, source, target, inputs):
+    all_costs_and_translations = []
+    files_removed = True
+    has_context = False
+    try:
+        for sline, tline in zip(source, target):
+            if files_removed:
+                sfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
+                tfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
+                eprint(f"Created temp files for training data: {sfile.name}, {tfile.name}")
+                files_removed = False
+                has_context = False
+
+            if sline != "\n" or tline != "\n":
+                eprint("NOOOOOOOO")
+                eprint(sline)
+                eprint(tline)
+                sfile.write(sline)
+                tfile.write(tline)
+                has_context = True
+            else:
+                sfile.close()
+                tfile.close()
+                eprint("AAAAAA")
+                input_line = inputs.readline()
+                if has_context:
+                    costs_and_translations = run_marian(sfile.name, tfile.name, input_line, config)
+                else:
+                    eprint("No context provided, skipping training")
+                    translations = translate_marian(input_line, config)
+                    costs_and_translations = [([], t) for t in translations]
+                all_costs_and_translations.append(costs_and_translations)
+                os.remove(sfile.name)
+                os.remove(tfile.name)
+                files_removed = True
+        # If the files didn't end with a newline, marian wasn't run for the last set of sentences
+        if not files_removed:
+            sfile.close()
+            tfile.close()
+            eprint("AAAAAA")
+            input_line = inputs.readline()
+            if has_context:
+                costs_and_translations = run_marian(sfile.name, tfile.name, input_line, config)
+            else:
+                eprint("No context provided, skipping training")
+                translations = translate_marian(input_line, config)
+                costs_and_translations = [([], t) for t in translations]
+            all_costs_and_translations.append(costs_and_translations)
+            os.remove(sfile.name)
+            os.remove(tfile.name)
+    finally:
+        for f in [sfile, tfile]:
+            if f is not None and os.path.exists(f.name):
+                if not f.closed:
+                    f.close()
+                os.remove(f.name)
+                eprint(f"ERROR: Needed cleanup for {f.name}")
+    return all_costs_and_translations
+
+def run_marian(sfile, tfile, input_line, config):
+    c = config
+    temp_model_path = create_temp_model_copy(c.model)
+    new_config = dataclasses.replace(c, model=temp_model_path)
+    costs = train_marian(sfile, tfile, new_config)
+    for path in glob.glob(f"{temp_model_path}*"):
+        eprint(f"Removing model file: {path}")
+        os.remove(path)
+    translations = translate_marian(input_line, config)
+    return (costs, translations)
+
+def create_temp_model_copy(model):
+    fd, path = tempfile.mkstemp(suffix='.npz')
+    eprint(f"Created temp file for model: {path}")
+    os.close(fd)
+    shutil.copyfile(model, path)
+    return path
+
+def train_marian(sfile, tfile, config):
+    c = config
+    process = sp.run([f"{c.marian_dir}/marian", '-m', c.model, '--disp-freq', '1', '--type', 'amun', '-v',
+                      c.vocab1, '-v', c.vocab2, '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2), '--dim-emb', '500',
+                      '--after-epochs', str(c.epochs), '--mini-batch', '1', '-t', sfile, tfile], capture_output=True, text=True)
+    eprint("STDOUT:")
+    eprint(process.stdout)
+    eprint("STDERR:")
+    eprint(process.stderr)
+    costs = extract_costs(process.stderr)
+    eprint("COSTS:")
+    eprint(costs)
+    return costs
+
+def extract_costs(output_log):
+    p = re.compile('Ep\..* Cost ([-e0-9.]+) .*: Time')
+    costs = []
+    for line in output_log.splitlines():
+        m = p.search(line)
+        if m is not None:
+            costs.append(m.group(1))
+    return costs
+
+def translate_marian(input_line, config):
+    c = config
+    process = sp.run([f"{c.marian_dir}/marian-decoder", '-m', c.model,
+                      '--type', 'amun', '-v', c.vocab1, '-v', c.vocab2,
+                      '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2),
+                      '--dim-emb', '500'], input=input_line, capture_output=True, text=True)
+    eprint("STDOUT:")
+    eprint(process.stdout)
+    eprint("STDERR:")
+    eprint(process.stderr)
+    translations = process.stdout.splitlines()
+    eprint(translations)
+    return translations
+
+def output_costs_and_translations(costs_and_translations, output_costs, output_transl):
+    eprint("COSTS AND TRANSLATIONS:")
+    eprint(costs_and_translations)
+
+    if output_costs is not None:
+        all_costs = [cost for costs, _ in costs_and_translations for cost in costs]
+        output_costs.write('\n'.join(all_costs))
+    if output_transl is not None:
+        all_translations = [translation for _, translations in costs_and_translations for translation in translations]
+        output_transl.write('\n'.join(all_translations))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/_self-adaptive/regenerate-expected-outputs.sh b/tests/_self-adaptive/regenerate-expected-outputs.sh
new file mode 100755
index 00000000..de0753e1
--- /dev/null
+++ b/tests/_self-adaptive/regenerate-expected-outputs.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+set -euo pipefail
+
+MRT_MODELS=../../models
+MRT_TOOLS=../../tools
+
+MODELS=$MRT_MODELS/wmt16_systems/en-de
+
+./gen-costs.py \
+    -t ubuntu.oracle_2s1e.{src,ref} \
+    -m $MODELS/model.npz \
+    -v $MODELS/vocab.{en,de}.json \
+    -e 1 \
+    --marian-dir ~/prog/cpp/marian-adaptive/build/ \
+    -i ubuntu.src \
+    --output-costs costs.expected \
+    --output-transl oracle.expected
+
+# Generate BLEU
+$MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle.expected > oracle.bleu.expected
+
+
+
+./gen-costs.py \
+    -t ubuntu.oracle_2s1e.{src,ref} \
+    -m $MODELS/model.npz \
+    -v $MODELS/vocab.{en,de}.json \
+    -e 1 \
+    --marian-dir ~/prog/cpp/marian-adaptive/build/ \
+    -i ubuntu.src \
+    --output-costs costs.expected \
+    --output-transl oracle.expected
+
+# Generate BLEU
+$MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle.expected2 > oracle.bleu.expected2

From edb6522697895637cb1b1a929de5ca7b5b14eebe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Thu, 21 Oct 2021 14:20:37 +0300
Subject: [PATCH 02/13] Factor out training file generation into a generator

---
 tests/_self-adaptive/gen-costs.py | 73 ++++++++++++++-----------------
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index afcf993b..54a09130 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -46,48 +46,43 @@ def main():
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
 
-def iterate_over_inputs(config, source, target, inputs):
-    all_costs_and_translations = []
-    files_removed = True
-    has_context = False
-    try:
-        for sline, tline in zip(source, target):
-            if files_removed:
-                sfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
-                tfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
-                eprint(f"Created temp files for training data: {sfile.name}, {tfile.name}")
-                files_removed = False
-                has_context = False
-
-            if sline != "\n" or tline != "\n":
-                eprint("NOOOOOOOO")
-                eprint(sline)
-                eprint(tline)
-                sfile.write(sline)
-                tfile.write(tline)
-                has_context = True
-            else:
-                sfile.close()
-                tfile.close()
-                eprint("AAAAAA")
-                input_line = inputs.readline()
-                if has_context:
-                    costs_and_translations = run_marian(sfile.name, tfile.name, input_line, config)
-                else:
-                    eprint("No context provided, skipping training")
-                    translations = translate_marian(input_line, config)
-                    costs_and_translations = [([], t) for t in translations]
-                all_costs_and_translations.append(costs_and_translations)
-                os.remove(sfile.name)
-                os.remove(tfile.name)
-                files_removed = True
-        # If the files didn't end with a newline, marian wasn't run for the last set of sentences
-        if not files_removed:
+def training_file_generator(source, target):
+    begin_sentences = True
+    contains_sentences = False
+    for sline, tline in zip(source, target):
+        if begin_sentences:
+            sfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
+            tfile = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False)
+            eprint(f"Created temp files for training data: {sfile.name}, {tfile.name}")
+            begin_sentences = False
+            contains_sentences = False
+
+        if sline != "\n" or tline != "\n":
+            eprint("NOOOOOOOO")
+            eprint(sline)
+            eprint(tline)
+            sfile.write(sline)
+            tfile.write(tline)
+            contains_sentences = True
+        else:
             sfile.close()
             tfile.close()
             eprint("AAAAAA")
-            input_line = inputs.readline()
-            if has_context:
+            yield (contains_sentences, sfile, tfile)
+            begin_sentences = True
+
+    # The last non-empty set of sentences can not be delimited with an empty line
+    if contains_sentences:
+        sfile.close()
+        tfile.close()
+        eprint("AAAAAA")
+        yield (contains_sentences, sfile, tfile)
+
+def iterate_over_inputs(config, source, target, inputs):
+    all_costs_and_translations = []
+    try:
+        for input_line, (contains_sentences, sfile, tfile) in zip(inputs, training_file_generator(source, target)):
+            if contains_sentences:
                 costs_and_translations = run_marian(sfile.name, tfile.name, input_line, config)
             else:
                 eprint("No context provided, skipping training")

From b098f9e4a5d9b61b82c68b326b888cfc0073ce4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Thu, 21 Oct 2021 15:53:06 +0300
Subject: [PATCH 03/13] Fix empty cost list insertion when not training

---
 tests/_self-adaptive/gen-costs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index 54a09130..91b209e9 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -87,7 +87,7 @@ def iterate_over_inputs(config, source, target, inputs):
             else:
                 eprint("No context provided, skipping training")
                 translations = translate_marian(input_line, config)
-                costs_and_translations = [([], t) for t in translations]
+                costs_and_translations = ([], translations)
             all_costs_and_translations.append(costs_and_translations)
             os.remove(sfile.name)
             os.remove(tfile.name)

From 97d59f2d3cfd850db868682b85e8a0e4b4cc8982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Thu, 21 Oct 2021 15:53:47 +0300
Subject: [PATCH 04/13] Generate outputs for the other test kinds

---
 .../regenerate-expected-outputs.sh            | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tests/_self-adaptive/regenerate-expected-outputs.sh b/tests/_self-adaptive/regenerate-expected-outputs.sh
index de0753e1..ca48c0a6 100755
--- a/tests/_self-adaptive/regenerate-expected-outputs.sh
+++ b/tests/_self-adaptive/regenerate-expected-outputs.sh
@@ -6,6 +6,7 @@ MRT_TOOLS=../../tools
 
 MODELS=$MRT_MODELS/wmt16_systems/en-de
 
+echo "### Generating files for the oracle tests"
 ./gen-costs.py \
     -t ubuntu.oracle_2s1e.{src,ref} \
     -m $MODELS/model.npz \
@@ -19,17 +20,24 @@ MODELS=$MRT_MODELS/wmt16_systems/en-de
 # Generate BLEU
 $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle.expected > oracle.bleu.expected
 
-
-
+echo "\n\n### Generating files for the partial context tests"
 ./gen-costs.py \
-    -t ubuntu.oracle_2s1e.{src,ref} \
+    -t ubuntu.contextpart.{src,ref} \
     -m $MODELS/model.npz \
     -v $MODELS/vocab.{en,de}.json \
     -e 1 \
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \
     -i ubuntu.src \
-    --output-costs costs.expected \
-    --output-transl oracle.expected
+    --output-costs contextpart.costs.expected \
+    --output-transl contextpart.expected
 
-# Generate BLEU
-$MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle.expected2 > oracle.bleu.expected2
+
+echo "\n\n### Generating files for the no context tests"
+./gen-costs.py \
+    -t ubuntu.nocontext.{src,ref} \
+    -m $MODELS/model.npz \
+    -v $MODELS/vocab.{en,de}.json \
+    -e 1 \
+    --marian-dir ~/prog/cpp/marian-adaptive/build/ \
+    -i ubuntu.nocontext.src \
+    --output-transl nocontext.expected

From 6c90942b8b7b63725ef2b20adc0797fe05bba614 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Fri, 22 Oct 2021 15:15:52 +0300
Subject: [PATCH 05/13] Don't swallow the last newline for translations; fix no
 context tests

---
 tests/_self-adaptive/gen-costs.py                   | 5 +++--
 tests/_self-adaptive/regenerate-expected-outputs.sh | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index 91b209e9..22f56a1d 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -147,6 +147,7 @@ def translate_marian(input_line, config):
                       '--type', 'amun', '-v', c.vocab1, '-v', c.vocab2,
                       '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2),
                       '--dim-emb', '500'], input=input_line, capture_output=True, text=True)
+    eprint(f"Translate input: {input_line}")
     eprint("STDOUT:")
     eprint(process.stdout)
     eprint("STDERR:")
@@ -161,10 +162,10 @@ def output_costs_and_translations(costs_and_translations, output_costs, output_t
 
     if output_costs is not None:
         all_costs = [cost for costs, _ in costs_and_translations for cost in costs]
-        output_costs.write('\n'.join(all_costs))
+        output_costs.writelines(map(lambda c: c + '\n', all_costs))
     if output_transl is not None:
         all_translations = [translation for _, translations in costs_and_translations for translation in translations]
-        output_transl.write('\n'.join(all_translations))
+        output_transl.writelines(map(lambda t: t + '\n', all_translations))
 
 
 if __name__ == "__main__":
diff --git a/tests/_self-adaptive/regenerate-expected-outputs.sh b/tests/_self-adaptive/regenerate-expected-outputs.sh
index ca48c0a6..4223490c 100755
--- a/tests/_self-adaptive/regenerate-expected-outputs.sh
+++ b/tests/_self-adaptive/regenerate-expected-outputs.sh
@@ -20,7 +20,7 @@ echo "### Generating files for the oracle tests"
 # Generate BLEU
 $MRT_TOOLS/moses-scripts/scripts/generic/multi-bleu.perl -lc ubuntu.ref < oracle.expected > oracle.bleu.expected
 
-echo "\n\n### Generating files for the partial context tests"
+echo -e "\n\n### Generating files for the partial context tests"
 ./gen-costs.py \
     -t ubuntu.contextpart.{src,ref} \
     -m $MODELS/model.npz \
@@ -32,12 +32,12 @@ echo "\n\n### Generating files for the partial context tests"
     --output-transl contextpart.expected
 
 
-echo "\n\n### Generating files for the no context tests"
+echo -e "\n\n### Generating files for the no context tests"
 ./gen-costs.py \
     -t ubuntu.nocontext.{src,ref} \
     -m $MODELS/model.npz \
     -v $MODELS/vocab.{en,de}.json \
     -e 1 \
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \
-    -i ubuntu.nocontext.src \
+    -i ubuntu.src \
     --output-transl nocontext.expected

From 2efa3bc9dd9929bbfee5c9b2934d900fd365b979 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Fri, 22 Oct 2021 15:48:36 +0300
Subject: [PATCH 06/13] Change the optimizer to sgd to match adaptive marian's
 default

---
 tests/_self-adaptive/gen-costs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index 22f56a1d..246f5dfd 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -120,7 +120,7 @@ def create_temp_model_copy(model):
 
 def train_marian(sfile, tfile, config):
     c = config
-    process = sp.run([f"{c.marian_dir}/marian", '-m', c.model, '--disp-freq', '1', '--type', 'amun', '-v',
+    process = sp.run([f"{c.marian_dir}/marian", '-m', c.model, '--disp-freq', '1', '--optimizer', 'sgd', '--type', 'amun', '-v',
                       c.vocab1, '-v', c.vocab2, '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2), '--dim-emb', '500',
                       '--after-epochs', str(c.epochs), '--mini-batch', '1', '-t', sfile, tfile], capture_output=True, text=True)
     eprint("STDOUT:")

From ded1fb7fb018a7fc361ae7702a8c5bbee84fee3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Fri, 22 Oct 2021 15:52:06 +0300
Subject: [PATCH 07/13] Update the *.exptected files

It's weird that the BLEU scores drop that much. Don't know why that's
happening and whether that's a problem
---
 .../_self-adaptive/contextpart.costs.expected | 24 +++++-----
 tests/_self-adaptive/contextpart.expected     | 10 ++---
 tests/_self-adaptive/costs.expected           | 44 +++++++++----------
 tests/_self-adaptive/oracle.bleu.expected     |  2 +-
 tests/_self-adaptive/oracle.expected          | 18 ++++----
 5 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/tests/_self-adaptive/contextpart.costs.expected b/tests/_self-adaptive/contextpart.costs.expected
index 67c51dcc..30a7cedb 100644
--- a/tests/_self-adaptive/contextpart.costs.expected
+++ b/tests/_self-adaptive/contextpart.costs.expected
@@ -1,12 +1,12 @@
-22.19
-2.75
-6.56
-0.29
-47.07
-15.31
-4.52
-0.11
-24.44
-2.20
-28.06
-6.56
+2.21881247
+2.21806955
+0.46859190
+0.46825695
+2.76864076
+2.76791906
+0.56501639
+0.56451356
+2.71565461
+2.71488905
+3.11743832
+3.11609936
diff --git a/tests/_self-adaptive/contextpart.expected b/tests/_self-adaptive/contextpart.expected
index 4a222e8b..5d2b071d 100644
--- a/tests/_self-adaptive/contextpart.expected
+++ b/tests/_self-adaptive/contextpart.expected
@@ -1,11 +1,11 @@
 klicken und ziehen
-die linke Maustaste für einen Rechtsklick gedrückt halten .
-Sie können rechts@@ klicken , indem Sie die linke Maustaste gedrückt halten .
+drücken Sie die linke Maustaste und klicken Sie mit der rechten Maustaste .
+Sie können mit Rechtsklick die linke Maustaste gedrückt halten .
 wechseln Sie &@@ lt@@ ; gu@@ i &gt; Sim@@ ulated Secondary Click &@@ lt@@ ; / gu@@ i &gt; weiter .
-warum sollte ich meine E-Mail-@@ Konten oder sozialen Medien zu Ihrem Desktop hinzufügen ?
+warum fügen Sie Ihre E-Mail oder Social Media auf Ihren Desktop ?
 warum sollte ich ein Konto hinzufügen ?
-Anwendungen entfernen , die Sie nicht mehr benötigen
+entfernen Sie die Software , die Sie nicht mehr verwenden .
 prüfen Sie Ihre Sicherung
 die von Ihnen verwendete Software kann in der Regel recht schnell wieder hergestellt werden .
 zurück eure wichtigen Akten
-was ist die &quot; Super &quot; -Taste ?
+was ist der &quot; Super &quot; -@@ Schlüssel ?
diff --git a/tests/_self-adaptive/costs.expected b/tests/_self-adaptive/costs.expected
index 1c8cb87c..326568ae 100644
--- a/tests/_self-adaptive/costs.expected
+++ b/tests/_self-adaptive/costs.expected
@@ -1,22 +1,22 @@
-1.26
-0.01
-22.19
-2.75
-6.56
-0.29
-77.35
-52.49
-47.07
-15.31
-4.52
-0.11
-24.44
-2.20
-7.48
-0.13
-38.95
-8.38
-13.98
-0.36
-28.06
-6.56
+0.31558657
+0.31486344
+2.21881247
+2.21806955
+0.46859190
+0.46825695
+4.07114267
+4.07016516
+2.76864076
+2.76791906
+0.56501639
+0.56451356
+2.71565461
+2.71488905
+1.87033248
+1.86898255
+1.77039230
+1.76994097
+2.79638195
+2.79453039
+3.11743832
+3.11609936
diff --git a/tests/_self-adaptive/oracle.bleu.expected b/tests/_self-adaptive/oracle.bleu.expected
index 104e7681..71a2740c 100644
--- a/tests/_self-adaptive/oracle.bleu.expected
+++ b/tests/_self-adaptive/oracle.bleu.expected
@@ -1 +1 @@
-BLEU = 79.14, 96.0/90.9/85.7/81.8 (BP=0.895, ratio=0.900, hyp_len=99, ref_len=110)
+BLEU = 28.81, 51.4/33.7/24.1/17.1 (BP=0.991, ratio=0.991, hyp_len=109, ref_len=110)
diff --git a/tests/_self-adaptive/oracle.expected b/tests/_self-adaptive/oracle.expected
index 8ca6bdbc..5d2b071d 100644
--- a/tests/_self-adaptive/oracle.expected
+++ b/tests/_self-adaptive/oracle.expected
@@ -1,11 +1,11 @@
 klicken und ziehen
-die linke Maustaste für einen Rechtsklick gedrückt halten .
-Sie können rechts@@ klicken , indem Sie die linke Maustaste gedrückt halten .
-aktivieren Sie &@@ lt@@ ; gu@@ i &gt; .
-warum sollte ich meine E-Mail-@@ Konten oder sozialen Medien zu Ihrem Desktop hinzufügen ?
+drücken Sie die linke Maustaste und klicken Sie mit der rechten Maustaste .
+Sie können mit Rechtsklick die linke Maustaste gedrückt halten .
+wechseln Sie &@@ lt@@ ; gu@@ i &gt; Sim@@ ulated Secondary Click &@@ lt@@ ; / gu@@ i &gt; weiter .
+warum fügen Sie Ihre E-Mail oder Social Media auf Ihren Desktop ?
 warum sollte ich ein Konto hinzufügen ?
-Anwendungen entfernen , die Sie nicht mehr benötigen
-ihre Sicherung überprüfen
-die Anwendungen , die Sie nutzen , können durch Neu@@ installation nach einem schwerwiegenden Rechner@@ problem meist schnell wiederhergestellt werden .
-sichern Ihrer wichtigen Dateien
-was ist die &quot; Super &quot; -Taste ?
+entfernen Sie die Software , die Sie nicht mehr verwenden .
+prüfen Sie Ihre Sicherung
+die von Ihnen verwendete Software kann in der Regel recht schnell wieder hergestellt werden .
+zurück eure wichtigen Akten
+was ist der &quot; Super &quot; -@@ Schlüssel ?

From b20cbfd599a88f70b2f9dd717ebb44c01575f44b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Tue, 26 Oct 2021 15:56:44 +0300
Subject: [PATCH 08/13] Update server test expected outputs

---
 tests/_self-adaptive/test_server_mode.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/_self-adaptive/test_server_mode.sh b/tests/_self-adaptive/test_server_mode.sh
index 3c79f62e..4a756619 100644
--- a/tests/_self-adaptive/test_server_mode.sh
+++ b/tests/_self-adaptive/test_server_mode.sh
@@ -19,11 +19,11 @@ kill $SERVER_PID
 
 test -e server.log
 grep -q "listening on port 8766" server.log
-grep -q '{"output":"dies ist ein Beispiel' server.log
+grep -q '{"output":"das ist ein Beispiel' server.log
 grep -q "Ep. 2 : Up. 4 : Sen. 2" server.log
 grep -q "Ep. 2 : Up. 2 : Sen. 1" server.log 
 grep -q "No context" server.log
-grep -q 'dies ist ein Beispiel' text.out
+grep -q 'das ist ein Beispiel' text.out
 
 # Exit with success code
 exit 0

From e92dafc31dc591b2ff788b33a5fad5670c73d829 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Mon, 8 Nov 2021 14:08:55 +0200
Subject: [PATCH 09/13] Make debug outputs saner

---
 tests/_self-adaptive/gen-costs.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index 246f5dfd..9a8ac77f 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -58,16 +58,14 @@ def training_file_generator(source, target):
             contains_sentences = False
 
         if sline != "\n" or tline != "\n":
-            eprint("NOOOOOOOO")
-            eprint(sline)
-            eprint(tline)
+            eprint(sline.rstrip())
+            eprint(tline.rstrip())
             sfile.write(sline)
             tfile.write(tline)
             contains_sentences = True
         else:
             sfile.close()
             tfile.close()
-            eprint("AAAAAA")
             yield (contains_sentences, sfile, tfile)
             begin_sentences = True
 
@@ -75,7 +73,6 @@ def training_file_generator(source, target):
     if contains_sentences:
         sfile.close()
         tfile.close()
-        eprint("AAAAAA")
         yield (contains_sentences, sfile, tfile)
 
 def iterate_over_inputs(config, source, target, inputs):

From 6e0f4aac9f9c3abe866ce27a812e89ccc772f7ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Mon, 8 Nov 2021 15:53:55 +0200
Subject: [PATCH 10/13] Make dimensions and model type configureable

---
 tests/_self-adaptive/gen-costs.py             | 41 ++++++++++++++-----
 .../regenerate-expected-outputs.sh            |  9 ++++
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/tests/_self-adaptive/gen-costs.py b/tests/_self-adaptive/gen-costs.py
index 9a8ac77f..0605e544 100755
--- a/tests/_self-adaptive/gen-costs.py
+++ b/tests/_self-adaptive/gen-costs.py
@@ -15,13 +15,18 @@
 class MarianTrainConfig:
     marian_dir: str
     model: str
+    model_type: str
     vocab1: str
     vocab2: str
     dim_vocab1: int
     dim_vocab2: int
+    dim_emb: int
     epochs: int
 
 
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
 def main():
     parser = ag.ArgumentParser()
     parser.add_argument('-t', '--train-sets',
@@ -29,7 +34,10 @@ def main():
     parser.add_argument('-i', '--input', type=ag.FileType('r', encoding='utf-8'), required=True)
     parser.add_argument('-m', '--model', required=True)
     parser.add_argument('-v', '--vocabs', nargs=2, required=True)
+    parser.add_argument('--dim-vocabs', nargs=2, type=int, required=False)
+    parser.add_argument('--dim-emb', type=int, required=False)
     parser.add_argument('-e', '--epochs', type=int, required=True)
+    parser.add_argument('--type', required=True)
     parser.add_argument('--marian-dir', required=True)
     parser.add_argument('--output-costs', type=ag.FileType('w', encoding='utf-8'))
     parser.add_argument('--output-transl', type=ag.FileType('w', encoding='utf-8'))
@@ -38,14 +46,14 @@ def main():
 
     [sfile, tfile] = args.train_sets
     [vocab1, vocab2] = args.vocabs
-    config = MarianTrainConfig(args.marian_dir, args.model, vocab1, vocab2, 85000, 85000, args.epochs)
+    [dvocab1, dvocab2] = args.dim_vocabs if args.dim_vocabs is not None else [None, None]
+    config = MarianTrainConfig(args.marian_dir, args.model, args.type, vocab1,
+                               vocab2, dvocab1, dvocab1, args.dim_emb, args.epochs)
+    eprint(config)
     costs_and_translations = iterate_over_inputs(config, sfile, tfile, args.input)
     output_costs_and_translations(costs_and_translations, args.output_costs, args.output_transl)
 
 
-def eprint(*args, **kwargs):
-    print(*args, file=sys.stderr, **kwargs)
-
 def training_file_generator(source, target):
     begin_sentences = True
     contains_sentences = False
@@ -117,9 +125,15 @@ def create_temp_model_copy(model):
 
 def train_marian(sfile, tfile, config):
     c = config
-    process = sp.run([f"{c.marian_dir}/marian", '-m', c.model, '--disp-freq', '1', '--optimizer', 'sgd', '--type', 'amun', '-v',
-                      c.vocab1, '-v', c.vocab2, '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2), '--dim-emb', '500',
-                      '--after-epochs', str(c.epochs), '--mini-batch', '1', '-t', sfile, tfile], capture_output=True, text=True)
+
+    args = [f"{c.marian_dir}/marian", '-m', c.model, '--disp-freq', '1', '--optimizer', 'sgd', '--type', c.model_type, '-v',
+            c.vocab1, '-v', c.vocab2, '--after-epochs', str(c.epochs), '--mini-batch', '1', '-t', sfile, tfile]
+    if c.dim_emb is not None:
+        args += ['--dim-emb', str(c.dim_emb)]
+    if c.dim_vocab1 is not None and c.dim_vocab2 is not None:
+        args += ['--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2)]
+    process = sp.run(args, capture_output=True, text=True)
+
     eprint("STDOUT:")
     eprint(process.stdout)
     eprint("STDERR:")
@@ -140,10 +154,15 @@ def extract_costs(output_log):
 
 def translate_marian(input_line, config):
     c = config
-    process = sp.run([f"{c.marian_dir}/marian-decoder", '-m', c.model,
-                      '--type', 'amun', '-v', c.vocab1, '-v', c.vocab2,
-                      '--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2),
-                      '--dim-emb', '500'], input=input_line, capture_output=True, text=True)
+
+    args = [f"{c.marian_dir}/marian-decoder", '-m', c.model,
+            '--type', c.model_type, '-v', c.vocab1, '-v', c.vocab2]
+    if c.dim_emb is not None:
+        args += ['--dim-emb', str(c.dim_emb)]
+    if c.dim_vocab1 is not None and c.dim_vocab2 is not None:
+        args += ['--dim-vocabs', str(c.dim_vocab1), str(c.dim_vocab2)]
+    process = sp.run(args, input=input_line, capture_output=True, text=True)
+
     eprint(f"Translate input: {input_line}")
     eprint("STDOUT:")
     eprint(process.stdout)
diff --git a/tests/_self-adaptive/regenerate-expected-outputs.sh b/tests/_self-adaptive/regenerate-expected-outputs.sh
index 4223490c..362a74de 100755
--- a/tests/_self-adaptive/regenerate-expected-outputs.sh
+++ b/tests/_self-adaptive/regenerate-expected-outputs.sh
@@ -10,6 +10,9 @@ echo "### Generating files for the oracle tests"
 ./gen-costs.py \
     -t ubuntu.oracle_2s1e.{src,ref} \
     -m $MODELS/model.npz \
+    --type amun \
+    --dim-vocabs 85000 85000 \
+    --dim-emb 500 \
     -v $MODELS/vocab.{en,de}.json \
     -e 1 \
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \
@@ -24,6 +27,9 @@ echo -e "\n\n### Generating files for the partial context tests"
 ./gen-costs.py \
     -t ubuntu.contextpart.{src,ref} \
     -m $MODELS/model.npz \
+    --type amun \
+    --dim-vocabs 85000 85000 \
+    --dim-emb 500 \
     -v $MODELS/vocab.{en,de}.json \
     -e 1 \
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \
@@ -36,6 +42,9 @@ echo -e "\n\n### Generating files for the no context tests"
 ./gen-costs.py \
     -t ubuntu.nocontext.{src,ref} \
     -m $MODELS/model.npz \
+    --type amun \
+    --dim-vocabs 85000 85000 \
+    --dim-emb 500 \
     -v $MODELS/vocab.{en,de}.json \
     -e 1 \
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \

From a8bdf85b6d0edfcec56b6de85025e65db2d909ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Mon, 8 Nov 2021 16:22:24 +0200
Subject: [PATCH 11/13] Add a self-adaptive transformer test

---
 .../regenerate-expected-outputs.sh            | 13 ++++++++++
 .../test_context_partial_transformer.sh       | 24 +++++++++++++++++++
 .../transformer.contextpart.costs.expected    | 12 ++++++++++
 .../transformer.contextpart.expected          | 11 +++++++++
 4 files changed, 60 insertions(+)
 create mode 100644 tests/_self-adaptive/test_context_partial_transformer.sh
 create mode 100644 tests/_self-adaptive/transformer.contextpart.costs.expected
 create mode 100644 tests/_self-adaptive/transformer.contextpart.expected

diff --git a/tests/_self-adaptive/regenerate-expected-outputs.sh b/tests/_self-adaptive/regenerate-expected-outputs.sh
index 362a74de..6b4b6dbc 100755
--- a/tests/_self-adaptive/regenerate-expected-outputs.sh
+++ b/tests/_self-adaptive/regenerate-expected-outputs.sh
@@ -50,3 +50,16 @@ echo -e "\n\n### Generating files for the no context tests"
     --marian-dir ~/prog/cpp/marian-adaptive/build/ \
     -i ubuntu.src \
     --output-transl nocontext.expected
+
+
+echo -e "\n\n### Generating files for the transformer partial context tests"
+./gen-costs.py \
+    -t ubuntu.contextpart.{src,ref} \
+    -m $MRT_MODELS/transformer/model.npz \
+    --type transformer \
+    -v $MRT_MODELS/transformer/vocab.ende.yml{,} \
+    -e 1 \
+    --marian-dir ~/prog/cpp/marian-adaptive/build/ \
+    -i ubuntu.src \
+    --output-costs transformer.contextpart.costs.expected \
+    --output-transl transformer.contextpart.expected
diff --git a/tests/_self-adaptive/test_context_partial_transformer.sh b/tests/_self-adaptive/test_context_partial_transformer.sh
new file mode 100644
index 00000000..b3d76607
--- /dev/null
+++ b/tests/_self-adaptive/test_context_partial_transformer.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Exit on error
+set -e
+
+# Test code goes here
+rm -f contextpart.log
+
+# Run Marian
+$MRT_MARIAN/marian-adaptive \
+  -m $MRT_MODELS/transformer/model.npz \
+  -v $MRT_MODELS/transformer/vocab.ende.yml -v $MRT_MODELS/transformer/vocab.ende.yml \
+  --after-epochs 1 \
+  -t ubuntu.contextpart.src ubuntu.contextpart.ref --log contextpart.transformer.log < ubuntu.src > contextpart.transformer.out
+
+# Check outputs
+$MRT_TOOLS/diff.sh contextpart.out contextpart.expected > contextpart.transformer.diff
+
+# Check costs
+cat contextpart.log | $MRT_TOOLS/extract-costs.sh > contextpart.costs.transformer.out
+$MRT_TOOLS/diff-nums.py -p 0.01 contextpart.costs.out contextpart.costs.expected -o contextpart.costs.transformer.diff
+
+# Exit with success code
+exit 0
diff --git a/tests/_self-adaptive/transformer.contextpart.costs.expected b/tests/_self-adaptive/transformer.contextpart.costs.expected
new file mode 100644
index 00000000..d2f8a912
--- /dev/null
+++ b/tests/_self-adaptive/transformer.contextpart.costs.expected
@@ -0,0 +1,12 @@
+5.41536570
+5.41065693
+4.29847670
+4.29496717
+4.33480740
+4.33143234
+0.43169200
+0.43012774
+2.70875025
+2.70587468
+3.08147693
+3.07468438
diff --git a/tests/_self-adaptive/transformer.contextpart.expected b/tests/_self-adaptive/transformer.contextpart.expected
new file mode 100644
index 00000000..41aaf88a
--- /dev/null
+++ b/tests/_self-adaptive/transformer.contextpart.expected
@@ -0,0 +1,11 @@
+Kli@@ cken und Japan@@ isch
+drücken und halten Sie die lin@@ ke Maustaste auf ¥ .
+Sie können die lin@@ ke Maustaste ge@@ drückt halten .
+Sch@@ alter = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+warum Ihre E @-@ Mail oder Social Media Ac@@ counts zu Ihrem Desktop hinzufügen ?
+warum sollte ich einen Account hinzufügen ?
+entfernen Sie Software , die Sie nicht mehr verwenden .
+überprüfen Sie Ihr Back@@ up
+die von Ihnen verwendete Software kann normalerweise sehr schnell nach einem ernsten Computer@@ problem durch Neu@@ installation wieder@@ hergestellt werden .
+sichern Sie Ihre wichtigen Dateien
+was ist der chinesische Schlüssel ?

From f95c51ce7aebbc438e0dbc6e2165c0822a840746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Thu, 16 Dec 2021 16:13:51 +0200
Subject: [PATCH 12/13] Fix self-adaptive server mode tests after changes in
 Marian

---
 tests/_self-adaptive/test_server_mode.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/_self-adaptive/test_server_mode.sh b/tests/_self-adaptive/test_server_mode.sh
index 4a756619..68db8e37 100644
--- a/tests/_self-adaptive/test_server_mode.sh
+++ b/tests/_self-adaptive/test_server_mode.sh
@@ -9,7 +9,7 @@ clean_up() {
 trap clean_up EXIT
 
 # Test code goes here
-$MRT_MARIAN/marian-adaptive -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml -p 8766 > server.log 2>&1 &
+$MRT_MARIAN/marian-adaptive-server -c $MRT_MODELS/wmt16_systems/marian.en-de.scorer.yml -p 8766 > server.log 2>&1 &
 SERVER_PID=$!
 
 sleep 20
@@ -19,7 +19,7 @@ kill $SERVER_PID
 
 test -e server.log
 grep -q "listening on port 8766" server.log
-grep -q '{"output":"das ist ein Beispiel' server.log
+grep -q 'Best translation 0 : das ist ein Beispiel' server.log
 grep -q "Ep. 2 : Up. 4 : Sen. 2" server.log
 grep -q "Ep. 2 : Up. 2 : Sen. 1" server.log 
 grep -q "No context" server.log

From bc7efdcc35d5940da6915cacd13a1ba51906c839 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= <rihards.krislauks@tilde.lv>
Date: Tue, 1 Feb 2022 11:00:16 +0200
Subject: [PATCH 13/13] Regenerate expected outputs for self-adaptive tests
 with a known config

CMake command: cmake .. -DCMAKE_BUILD_TYPE=Debug -DCOMPILE_ADAPTIVE=ON -DCOMPILE_SERVER=ON
GCC version: 7.5.0
CUDA version: 10.1
GPU model: Quadro RTX 6000 (Turing)
---
 .../_self-adaptive/contextpart.costs.expected | 16 +++++-----
 tests/_self-adaptive/costs.expected           | 30 +++++++++----------
 .../transformer.contextpart.costs.expected    | 18 +++++------
 3 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/tests/_self-adaptive/contextpart.costs.expected b/tests/_self-adaptive/contextpart.costs.expected
index 30a7cedb..8c33473d 100644
--- a/tests/_self-adaptive/contextpart.costs.expected
+++ b/tests/_self-adaptive/contextpart.costs.expected
@@ -1,12 +1,12 @@
 2.21881247
 2.21806955
-0.46859190
-0.46825695
-2.76864076
-2.76791906
-0.56501639
+0.46859169
+0.46825710
+2.76864004
+2.76791930
+0.56501710
 0.56451356
-2.71565461
-2.71488905
-3.11743832
+2.71565413
+2.71488881
+3.11743879
 3.11609936
diff --git a/tests/_self-adaptive/costs.expected b/tests/_self-adaptive/costs.expected
index 326568ae..ac09ceab 100644
--- a/tests/_self-adaptive/costs.expected
+++ b/tests/_self-adaptive/costs.expected
@@ -1,22 +1,22 @@
-0.31558657
-0.31486344
+0.31558633
+0.31486320
 2.21881247
 2.21806955
-0.46859190
-0.46825695
+0.46859169
+0.46825710
 4.07114267
-4.07016516
-2.76864076
-2.76791906
-0.56501639
+4.07016468
+2.76864004
+2.76791930
+0.56501710
 0.56451356
-2.71565461
-2.71488905
-1.87033248
-1.86898255
+2.71565413
+2.71488881
+1.87033439
+1.86898220
 1.77039230
 1.76994097
-2.79638195
-2.79453039
-3.11743832
+2.79638267
+2.79453158
+3.11743879
 3.11609936
diff --git a/tests/_self-adaptive/transformer.contextpart.costs.expected b/tests/_self-adaptive/transformer.contextpart.costs.expected
index d2f8a912..eb54d76d 100644
--- a/tests/_self-adaptive/transformer.contextpart.costs.expected
+++ b/tests/_self-adaptive/transformer.contextpart.costs.expected
@@ -1,12 +1,12 @@
 5.41536570
-5.41065693
+5.41065788
 4.29847670
 4.29496717
-4.33480740
-4.33143234
-0.43169200
-0.43012774
-2.70875025
-2.70587468
-3.08147693
-3.07468438
+4.33481550
+4.33142281
+0.43169218
+0.43012768
+2.70875049
+2.70587540
+3.08147740
+3.07468462