より精度の高い、アクセント句差分の適用に関する提案 (VOICEVOX#1612)

Hiroshiba · Nov 10, 2023 · 16c6380 · 16c6380
1 parent a39b374
commit 16c6380
Show file tree

Hide file tree

Showing 2 changed files with 125 additions and 62 deletions.
diff --git a/src/store/audio.ts b/src/store/audio.ts
@@ -1,7 +1,6 @@
 import path from "path";
 import { v4 as uuidv4 } from "uuid";
 import Encoding from "encoding-japanese";
-import { diffArrays } from "diff";
 import { toRaw } from "vue";
 import { createUILockAction, withProgress } from "./ui";
 import {
@@ -28,6 +27,7 @@ import {
   DEFAULT_STYLE_NAME,
   formatCharacterStyleName,
   joinTextsInAccentPhrases,
+  TuningTranscription,
 } from "./utility";
 import { convertAudioQueryFromEditorToEngine } from "./proxy";
 import { createPartialStore } from "./vuex";
@@ -1974,66 +1974,12 @@ export const audioCommandStore = transformCommandStore(
               if (!state.experimentalSetting.shouldKeepTuningOnTextChange) {
                 newAccentPhrases = accentPhrases;
               } else {
-                /*
-                 * # 調整結果の保持の仕組み
-                 * 1. 新しいAccentPhraseと古いAccentPhraseのテキスト（モーラのカタカナを結合したもの）を比較する。読点は無視する。（diffからflatDiff）
-                 * 例えば、
-                 * 旧：[ズ ン ダ モ ン ノ] [チョ ウ ショ ク]
-                 * 新：[ズ ン ダ モ ン ノ] [ユ ウ ショ ク]
-                 * という場合、
-                 *   [ズ ン ダ モ ン ノ]
-                 * + [ユ ウ ショ ク]
-                 * - [チョ ウ ショ ク]
-                 * のような変更なしのdiff・追加のdiff・削除のdiffが得られる。
-                 *
-                 * 2. それぞれのdiffにインデックスを振る。（indexedDiff）
-                 * 3. diffのインデックスと古いAccentPhraseの対応表を作る。（indexToOldAccentPhrase）
-                 * 追加のdiffを抜くと古いAccentPhraseになるので、残ったAccentPhraseのIDを対応させる。
-                 *   [ズ ン ダ モ ン ノ] #0 -> query.accentPhrases[0]
-                 * + [ユ ウ ショ ク]     #1 -> （無視）
-                 * - [チョ ウ ショ ク]   #2 -> query.accentPhrases[1]
-                 *
-                 * 4. 新しいAccentPhraseの配列を作る。（newAccentPhrases）
-                 * 変更なしのdiffは上の対応表を使って古いAccentPhrase、追加のdiffは新しいAccentPhraseを使い、削除のdiffは無視する。
-                 *   [ズ ン ダ モ ン ノ] #0 -> query.accentPhrases[0]
-                 * + [ユ ウ ショ ク]     #1 -> accentPhrases[1]
-                 * - [チョ ウ ショ ク]   #2 -> （無視）
-                 */
-                const diff = diffArrays(
-                  query.accentPhrases.map(joinTextsInAccentPhrases),
-                  accentPhrases.map(joinTextsInAccentPhrases)
-                );
-                const flatDiff = diff.flatMap((d) =>
-                  d.value.map((v) => ({ ...d, value: v }))
-                );
-                const indexedDiff = flatDiff.map((d, i) => ({
-                  ...d,
-                  index: i,
-                }));
-                const indexToOldAccentPhrase = indexedDiff
-                  .filter((d) => !d.added)
-                  .reduce(
-                    (acc, d, i) => ({
-                      ...acc,
-                      [d.index]: toRaw(query.accentPhrases[i]),
-                    }),
-                    {} as { [index: number]: AccentPhrase }
-                  );
-                newAccentPhrases = indexedDiff
-                  .filter((d) => !d.removed)
-                  .map((d, i) => {
-                    const ap = structuredClone(
-                      indexToOldAccentPhrase[d.index] ?? accentPhrases[i]
-                    );
-                    if (accentPhrases[i].pauseMora !== undefined) {
-                      ap.pauseMora = accentPhrases[i].pauseMora;
-                    } else {
-                      delete ap.pauseMora;
-                    }
-                    ap.isInterrogative = accentPhrases[i].isInterrogative;
-
-                    return ap;
-                  });
+                const mergedDiff: AccentPhrase[] = new TuningTranscription(
+                  query.accentPhrases,
+                  accentPhrases
+                ).transcribe();
+
+                newAccentPhrases = mergedDiff;
               }
             }
             commit("COMMAND_CHANGE_AUDIO_TEXT", {

diff --git a/src/store/utility.ts b/src/store/utility.ts
@@ -1,7 +1,8 @@
 import path from "path";
 import { Platform } from "quasar";
+import { diffArrays } from "diff";
 import { ToolbarButtonTagType, isMac } from "@/type/preload";
-import { AccentPhrase } from "@/openapi";
+import { AccentPhrase, Mora } from "@/openapi";
 
 export const DEFAULT_STYLE_NAME = "ノーマル";
 
@@ -173,6 +174,122 @@ function skipMemoText(targettext: string): string {
   return resolvedText;
 }
 
+/**
+ * 2つのアクセント句配列を比べて同じだと思われるモーラの調整結果を転写し
+ * 変更前のアクセント句の調整結果を変更後のアクセント句に保持する。
+ * 「こんにちは」 -> 「こんばんは」と変更した場合、以下の例において[]に囲まれる部分は、変更前のモーラが再利用される。
+ * <例>
+ *
+ * 「 [こん]ばん[は] 」
+ */
+export class TuningTranscription {
+  beforeAccent: AccentPhrase[];
+  afterAccent: AccentPhrase[];
+  constructor(beforeAccent: AccentPhrase[], afterAccent: AccentPhrase[]) {
+    this.beforeAccent = JSON.parse(JSON.stringify(beforeAccent));
+    this.afterAccent = JSON.parse(JSON.stringify(afterAccent));
+  }
+
+  createFlatArray<T, K extends keyof T>(collection: T[], key: K): T[K][] {
+    const result: T[K][] = [];
+    for (const element of collection) {
+      const value = element[key];
+      if (Array.isArray(value)) {
+        result.push(...value);
+      } else {
+        result.push(value);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * 変更前の配列を操作してpatchMora配列を作る。
+   * <例> (Ｕはundefined）
+   *         変更前のテキスト差分: [ "ズ", "ン", "ダ", "モ", "ン", "ナ", "ノ", "ダ" ]
+   *         変更後のテキスト差分: [ "ボ", "ク", "ズ", "ン", "ダ", "ナ", "ノ", "デ", "ス" ]
+   *                                              ↓
+   *                                              ↓ 再利用される文字列とundefinedで構成されたデータを作る。
+   *                                              ↓ 比較しやすいように文字列とundefinedを記述しているが、
+   *                                              ↓ 実際には"ズ"などの文字列部分が{text: "ズ"...}のようなデータ構造となる。
+   *                                              ↓
+   *                               [  Ｕ ,  Ｕ , "ズ", "ン", "ダ", "ナ", "ノ",  Ｕ ,  Ｕ  ]
+   *
+   *  したがって、最終的にこちらのようなデータ構造(↓)が出力される。
+   *  実際に作られるpatchMora配列: [  Ｕ ,  Ｕ , {text: "ズ"...}, {text: "ン"...}, {text: "ダ"...},{text: "ナ"...},{text: "ノ"...},  Ｕ ,  Ｕ  ]
+   */
+  createDiffPatch() {
+    const before = structuredClone(this.beforeAccent);
+    const after = structuredClone(this.afterAccent);
+
+    const beforeFlatArray = this.createFlatArray(before, "moras");
+    const afterFlatArray = this.createFlatArray(after, "moras");
+    const diffed = diffArrays(
+      this.createFlatArray(structuredClone(beforeFlatArray), "text" as never),
+      this.createFlatArray(structuredClone(afterFlatArray), "text" as never)
+    );
+    let currentTextIndex = 0;
+    for (const diff of diffed) {
+      if (diff.removed) {
+        beforeFlatArray.splice(currentTextIndex, diff.count);
+      } else if (diff.added) {
+        diff.value.forEach(() => {
+          beforeFlatArray.splice(currentTextIndex, 0, undefined as never);
+          currentTextIndex++;
+        });
+      } else {
+        currentTextIndex += diff.value.length;
+      }
+    }
+    return beforeFlatArray;
+  }
+  /**
+   * 「こんにちは」 -> 「こんばんは」 とテキストを変更した場合、以下の例のように、moraPatch配列とafter(AccentPhrases)を比較し、
+   * text(key)の値が一致するとき、after[...]["moras"][moraIndex] = moraPatch[moraPatchIndex]と代入することで、モーラを再利用する。
+   *
+   *  <例> (「||」は等号記号を表す)
+   *           moraPatch = [ {text: "コ"...}, {text: "ン"...}, undefined      , undefined      , {text: "ハ"...} ]
+   *                              ||                ||                                                ||
+   * after[...]["moras"] = [ {text: "コ"...}, {text: "ン"...}, {text: "バ"...}, {text: "ン"...}, {text: "ハ"...} ]
+   *
+   */
+  mergeAccentPhrases(moraPatch: (Mora | undefined)[]): AccentPhrase[] {
+    const after: AccentPhrase[] = structuredClone(this.afterAccent);
+    let moraPatchIndex = 0;
+
+    // 与えられたアクセント句は、AccentPhrases[ Number ][ Object Key ][ Number ]の順番で、モーラを操作できるため、二重forで回す。
+    for (let accentIndex = 0; accentIndex < after.length; accentIndex++) {
+      for (
+        let moraIndex = 0;
+        moraIndex < after[accentIndex]["moras"].length;
+        moraIndex++
+      ) {
+        // undefinedのとき、何もせず次のモーラへ移動
+        if (moraPatch[moraPatchIndex] == undefined) {
+          moraPatchIndex++;
+          continue;
+        }
+        if (
+          after[accentIndex]["moras"][moraIndex].text ===
+          moraPatch[moraPatchIndex]?.text
+        ) {
+          after[accentIndex]["moras"][moraIndex] = moraPatch[
+            moraPatchIndex
+          ] as Mora;
+        }
+        moraPatchIndex++;
+      }
+    }
+
+    return after;
+  }
+
+  transcribe() {
+    const moraPatch = this.createDiffPatch();
+    return this.mergeAccentPhrases(moraPatch as never);
+  }
+}
+
 /**
  * ２つのAccentPhrasesのテキスト内容が異なるかどうかを判定
  */