From 6caf834117a46e9b931af8e140d8cc43b58951d0 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Sat, 31 Aug 2024 22:21:30 +0900 Subject: [PATCH 1/4] =?UTF-8?q?Docs:=20=E3=83=8E=E3=83=BC=E3=83=88?= =?UTF-8?q?=E3=81=AE=E8=BF=BD=E5=8A=A0=E6=96=B9=E6=B3=95=E3=81=A8=E9=81=B8?= =?UTF-8?q?=E6=8A=9E=E7=8A=B6=E6=85=8B=E3=81=AE=E8=A7=A3=E9=99=A4=E6=96=B9?= =?UTF-8?q?=E6=B3=95=E3=82=92=E8=BF=BD=E8=A8=98=20(#2225)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Docs: ノートの追加方法と選択状態の解除方法を追記 * Apply suggestions from code review --- public/howtouse.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/public/howtouse.md b/public/howtouse.md index c407e21984..9ac56da6b1 100644 --- a/public/howtouse.md +++ b/public/howtouse.md @@ -302,6 +302,12 @@ VOICEVOX では、歌声合成機能がプロトタイプ版として提供さ ソング機能は鋭意制作中です。フィードバックをお待ちしています。 +### ノート(音符)の追加 + +ピアノロールをクリックすることで、その高さのノート(音符)を追加できます。 +ドラッグすることで、長さを指定しつつノートを追加することもできます。 +`Esc`キーでノートの選択状態を解除できます。 + ### 歌詞の入力 ノートをダブルクリックすることで歌詞を入力できます。複数の文字を入力すれば一括入力できます。 From d3904c495ad0af7f57cb8e57c4578728ad008e0f Mon Sep 17 00:00:00 2001 From: Sig <62321214+sigprogramming@users.noreply.github.com> Date: Wed, 4 Sep 2024 04:42:06 +0900 Subject: [PATCH 2/4] =?UTF-8?q?=E3=82=BD=E3=83=B3=E3=82=B0=EF=BC=9A?= =?UTF-8?q?=E3=83=95=E3=83=AC=E3=83=BC=E3=82=BA=E3=81=AE=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=83=80=E3=83=AA=E3=83=B3=E3=82=B0=E5=87=A6=E7=90=86=E3=82=92?= =?UTF-8?q?=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=B0=20(#2248)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * フレーズレンダラーを追加、リファクタリング * 修正、コメントを追加 * コメントを修正 * バグがあったので修正 * cloneをしていなかったので修正 * EditorFrameAudioQueryを使うようにした * editFrameRateをeditorFrameRateに変更 * PhraseRenderStageIdを上で定義して、Stageでそれを使うようにした * StageをBaseStageに変更 * externalDependenciesから取得するところを分割代入に変更 * phraseの一時変数を追加 * コメントを追加・修正 * Update src/sing/phraseRendering.ts Co-authored-by: Hiroshiba * Update src/sing/phraseRendering.ts Co-authored-by: Hiroshiba * EditorFrameAudioQueryを使うようにした --------- Co-authored-by: Hiroshiba --- src/components/Sing/ScoreSequencer.vue | 8 +- .../Sing/SequencerPhraseIndicator.vue | 9 +- src/components/Sing/SequencerPitch.vue | 26 +- src/sing/domain.ts | 61 +- src/sing/phraseRendering.ts | 793 ++++++++++++++++ src/store/singing.ts | 844 +++++++----------- src/store/type.ts | 134 +-- tests/unit/lib/selectPriorPhrase.spec.ts | 44 +- 8 files changed, 1232 insertions(+), 687 deletions(-) create mode 100644 src/sing/phraseRendering.ts diff --git a/src/components/Sing/ScoreSequencer.vue b/src/components/Sing/ScoreSequencer.vue index f8efa45c89..ec9bb4c12a 100644 --- a/src/components/Sing/ScoreSequencer.vue +++ b/src/components/Sing/ScoreSequencer.vue @@ -360,7 +360,7 @@ const phraseInfosInOtherTracks = computed(() => { const ctrlKey = useCommandOrControlKey(); const editTarget = computed(() => state.sequencerEditTarget); -const editFrameRate = computed(() => state.editFrameRate); +const editorFrameRate = computed(() => state.editorFrameRate); const scrollBarWidth = ref(12); const sequencerBody = ref(null); @@ -601,7 +601,7 @@ const previewDrawPitch = () => { if (previewPitchEdit.value.type !== "draw") { throw new Error("previewPitchEdit.value.type is not draw."); } - const frameRate = editFrameRate.value; + const frameRate = editorFrameRate.value; const cursorBaseX = (scrollX.value + cursorX.value) / zoomX.value; const cursorBaseY = (scrollY.value + cursorY.value) / zoomY.value; const cursorTicks = baseXToTick(cursorBaseX, tpqn.value); @@ -675,7 +675,7 @@ const previewErasePitch = () => { if (previewPitchEdit.value.type !== "erase") { throw new Error("previewPitchEdit.value.type is not erase."); } - const frameRate = editFrameRate.value; + const frameRate = editorFrameRate.value; const cursorBaseX = (scrollX.value + cursorX.value) / zoomX.value; const cursorTicks = baseXToTick(cursorBaseX, tpqn.value); const cursorSeconds = tickToSecond(cursorTicks, tempos.value, tpqn.value); @@ -827,7 +827,7 @@ const startPreview = (event: MouseEvent, mode: PreviewMode, note?: Note) => { } else if (editTarget.value === "PITCH") { // 編集ターゲットがピッチのときの処理 - const frameRate = editFrameRate.value; + const frameRate = editorFrameRate.value; const cursorTicks = baseXToTick(cursorBaseX, tpqn.value); const cursorSeconds = tickToSecond(cursorTicks, tempos.value, tpqn.value); const cursorFrame = Math.round(cursorSeconds * frameRate); diff --git a/src/components/Sing/SequencerPhraseIndicator.vue b/src/components/Sing/SequencerPhraseIndicator.vue index 85049ed190..4994188884 100644 --- a/src/components/Sing/SequencerPhraseIndicator.vue +++ b/src/components/Sing/SequencerPhraseIndicator.vue @@ -6,15 +6,16 @@ import { computed } from "vue"; import { useStore } from "@/store"; import { getOrThrow } from "@/helpers/mapHelper"; -import { PhraseSourceHash, PhraseState } from "@/store/type"; +import { PhraseKey, PhraseState } from "@/store/type"; const props = defineProps<{ - phraseKey: PhraseSourceHash; + phraseKey: PhraseKey; isInSelectedTrack: boolean; }>(); const store = useStore(); const classNames: Record = { + SINGER_IS_NOT_SET: "singer-is-not-set", WAITING_TO_BE_RENDERED: "waiting-to-be-rendered", NOW_RENDERING: "now-rendering", COULD_NOT_RENDER: "could-not-render", @@ -43,6 +44,10 @@ const className = computed(() => { } } +.singer-is-not-set { + visibility: hidden; +} + .waiting-to-be-rendered { @include tint-if-in-other-track( "background-color", diff --git a/src/components/Sing/SequencerPitch.vue b/src/components/Sing/SequencerPitch.vue index e03c86fd0e..a5d911cc1e 100644 --- a/src/components/Sing/SequencerPitch.vue +++ b/src/components/Sing/SequencerPitch.vue @@ -30,6 +30,7 @@ import { ExhaustiveError } from "@/type/utility"; import { createLogger } from "@/domain/frontend/log"; import { getLast } from "@/sing/utility"; import { getOrThrow } from "@/helpers/mapHelper"; +import { EditorFrameAudioQuery } from "@/store/type"; type PitchLine = { readonly color: Color; @@ -55,21 +56,24 @@ const pitchEditData = computed(() => { }); const previewPitchEdit = computed(() => props.previewPitchEdit); const selectedTrackId = computed(() => store.getters.SELECTED_TRACK_ID); -const editFrameRate = computed(() => store.state.editFrameRate); +const editorFrameRate = computed(() => store.state.editorFrameRate); const singingGuidesInSelectedTrack = computed(() => { - const singingGuides = []; + const singingGuides: { + query: EditorFrameAudioQuery; + startTime: number; + }[] = []; for (const phrase of store.state.phrases.values()) { if (phrase.trackId !== selectedTrackId.value) { continue; } - if (phrase.singingGuideKey == undefined) { + if (phrase.queryKey == undefined) { continue; } - const singingGuide = getOrThrow( - store.state.singingGuides, - phrase.singingGuideKey, - ); - singingGuides.push(singingGuide); + const phraseQuery = getOrThrow(store.state.phraseQueries, phrase.queryKey); + singingGuides.push({ + startTime: phrase.startTime, + query: phraseQuery, + }); } return singingGuides; }); @@ -259,13 +263,13 @@ const setPitchDataToPitchLine = async ( const generateOriginalPitchData = () => { const unvoicedPhonemes = UNVOICED_PHONEMES; - const frameRate = editFrameRate.value; // f0(元のピッチ)は編集フレームレートで表示する + const frameRate = editorFrameRate.value; // f0(元のピッチ)はエディターのフレームレートで表示する // 選択中のトラックで使われている歌い方のf0を結合してピッチデータを生成する const tempData = []; for (const singingGuide of singingGuidesInSelectedTrack.value) { // TODO: 補間を行うようにする - if (singingGuide.frameRate !== frameRate) { + if (singingGuide.query.frameRate !== frameRate) { throw new Error( "The frame rate between the singing guide and the edit does not match.", ); @@ -312,7 +316,7 @@ const generateOriginalPitchData = () => { }; const generatePitchEditData = () => { - const frameRate = editFrameRate.value; + const frameRate = editorFrameRate.value; const tempData = [...pitchEditData.value]; // プレビュー中のピッチ編集があれば、適用する diff --git a/src/sing/domain.ts b/src/sing/domain.ts index 6d84530b73..9acfef90c6 100644 --- a/src/sing/domain.ts +++ b/src/sing/domain.ts @@ -4,15 +4,11 @@ import { Note, Phrase, PhraseSource, - SingingGuide, - SingingGuideSource, - SingingVoiceSource, Tempo, TimeSignature, - phraseSourceHashSchema, + PhraseKey, Track, - singingGuideSourceHashSchema, - singingVoiceSourceHashSchema, + EditorFrameAudioQuery, } from "@/store/type"; import { FramePhoneme } from "@/openapi"; import { TrackId } from "@/type/preload"; @@ -297,7 +293,7 @@ export const DEFAULT_BEAT_TYPE = 4; export const SEQUENCER_MIN_NUM_MEASURES = 32; // マルチエンジン対応のために将来的に廃止予定で、利用は非推奨 -export const DEPRECATED_DEFAULT_EDIT_FRAME_RATE = 93.75; +export const DEPRECATED_DEFAULT_EDITOR_FRAME_RATE = 93.75; export const VALUE_INDICATING_NO_DATA = -1; @@ -379,23 +375,9 @@ export function isValidPitchEditData(pitchEditData: number[]) { ); } -export const calculatePhraseSourceHash = async (phraseSource: PhraseSource) => { +export const calculatePhraseKey = async (phraseSource: PhraseSource) => { const hash = await calculateHash(phraseSource); - return phraseSourceHashSchema.parse(hash); -}; - -export const calculateSingingGuideSourceHash = async ( - singingGuideSource: SingingGuideSource, -) => { - const hash = await calculateHash(singingGuideSource); - return singingGuideSourceHashSchema.parse(hash); -}; - -export const calculateSingingVoiceSourceHash = async ( - singingVoiceSource: SingingVoiceSource, -) => { - const hash = await calculateHash(singingVoiceSource); - return singingVoiceSourceHashSchema.parse(hash); + return PhraseKey(hash); }; export function getStartTicksOfPhrase(phrase: Phrase) { @@ -469,20 +451,21 @@ export function convertToFramePhonemes(phonemes: FramePhoneme[]) { } export function applyPitchEdit( - singingGuide: SingingGuide, + phraseQuery: EditorFrameAudioQuery, + phraseStartTime: number, pitchEditData: number[], - editFrameRate: number, + editorFrameRate: number, ) { - // 歌い方のフレームレートと編集フレームレートが一致しない場合はエラー + // フレーズのクエリのフレームレートとエディターのフレームレートが一致しない場合はエラー // TODO: 補間するようにする - if (singingGuide.frameRate !== editFrameRate) { + if (phraseQuery.frameRate !== editorFrameRate) { throw new Error( - "The frame rate between the singing guide and the edit data does not match.", + "The frame rate between the phrase query and the editor does not match.", ); } const unvoicedPhonemes = UNVOICED_PHONEMES; - const f0 = singingGuide.query.f0; - const phonemes = singingGuide.query.phonemes; + const f0 = phraseQuery.f0; + const phonemes = phraseQuery.phonemes; // 各フレームの音素の配列を生成する const framePhonemes = convertToFramePhonemes(phonemes); @@ -490,21 +473,21 @@ export function applyPitchEdit( throw new Error("f0.length and framePhonemes.length do not match."); } - // 歌い方の開始フレームと終了フレームを計算する - const singingGuideFrameLength = f0.length; - const singingGuideStartFrame = Math.round( - singingGuide.startTime * singingGuide.frameRate, + // フレーズのクエリの開始フレームと終了フレームを計算する + const phraseQueryFrameLength = f0.length; + const phraseQueryStartFrame = Math.round( + phraseStartTime * phraseQuery.frameRate, ); - const singingGuideEndFrame = singingGuideStartFrame + singingGuideFrameLength; + const phraseQueryEndFrame = phraseQueryStartFrame + phraseQueryFrameLength; // ピッチ編集をf0に適用する - const startFrame = Math.max(0, singingGuideStartFrame); - const endFrame = Math.min(pitchEditData.length, singingGuideEndFrame); + const startFrame = Math.max(0, phraseQueryStartFrame); + const endFrame = Math.min(pitchEditData.length, phraseQueryEndFrame); for (let i = startFrame; i < endFrame; i++) { - const phoneme = framePhonemes[i - singingGuideStartFrame]; + const phoneme = framePhonemes[i - phraseQueryStartFrame]; const voiced = !unvoicedPhonemes.includes(phoneme); if (voiced && pitchEditData[i] !== VALUE_INDICATING_NO_DATA) { - f0[i - singingGuideStartFrame] = pitchEditData[i]; + f0[i - phraseQueryStartFrame] = pitchEditData[i]; } } } diff --git a/src/sing/phraseRendering.ts b/src/sing/phraseRendering.ts new file mode 100644 index 0000000000..dd4aba5545 --- /dev/null +++ b/src/sing/phraseRendering.ts @@ -0,0 +1,793 @@ +/** + * フレーズごとに音声合成するフレーズレンダラーと、それに必要な処理。 + * レンダリングが必要かどうかの判定やキャッシュの作成も行う。 + */ + +import { + Note, + PhraseKey, + Singer, + SingingVoice, + SingingVoiceKey, + Tempo, + Track, + SingingVolumeKey, + SingingVolume, + EditorFrameAudioQueryKey, + EditorFrameAudioQuery, +} from "@/store/type"; +import { FramePhoneme, Note as NoteForRequestToEngine } from "@/openapi"; +import { applyPitchEdit, decibelToLinear, tickToSecond } from "@/sing/domain"; +import { calculateHash, linearInterpolation } from "@/sing/utility"; +import { EngineId, StyleId, TrackId } from "@/type/preload"; +import { createLogger } from "@/domain/frontend/log"; +import { cloneWithUnwrapProxy } from "@/helpers/cloneWithUnwrapProxy"; +import { getOrThrow } from "@/helpers/mapHelper"; + +const logger = createLogger("sing/phraseRendering"); + +/** + * リクエスト用のノーツ(と休符)を作成する。 + */ +const createNotesForRequestToEngine = ( + firstRestDuration: number, + lastRestDurationSeconds: number, + notes: Note[], + tempos: Tempo[], + tpqn: number, + frameRate: number, +) => { + const notesForRequestToEngine: NoteForRequestToEngine[] = []; + + // 先頭の休符を変換 + const firstRestStartSeconds = tickToSecond( + notes[0].position - firstRestDuration, + tempos, + tpqn, + ); + const firstRestStartFrame = Math.round(firstRestStartSeconds * frameRate); + const firstRestEndSeconds = tickToSecond(notes[0].position, tempos, tpqn); + const firstRestEndFrame = Math.round(firstRestEndSeconds * frameRate); + notesForRequestToEngine.push({ + key: undefined, + frameLength: firstRestEndFrame - firstRestStartFrame, + lyric: "", + }); + + // ノートを変換 + for (const note of notes) { + const noteOnSeconds = tickToSecond(note.position, tempos, tpqn); + const noteOnFrame = Math.round(noteOnSeconds * frameRate); + const noteOffSeconds = tickToSecond( + note.position + note.duration, + tempos, + tpqn, + ); + const noteOffFrame = Math.round(noteOffSeconds * frameRate); + notesForRequestToEngine.push({ + key: note.noteNumber, + frameLength: noteOffFrame - noteOnFrame, + lyric: note.lyric, + }); + } + + // 末尾に休符を追加 + const lastRestFrameLength = Math.round(lastRestDurationSeconds * frameRate); + notesForRequestToEngine.push({ + key: undefined, + frameLength: lastRestFrameLength, + lyric: "", + }); + + // frameLengthが1以上になるようにする + for (let i = 0; i < notesForRequestToEngine.length; i++) { + const frameLength = notesForRequestToEngine[i].frameLength; + const frameToShift = Math.max(0, 1 - frameLength); + notesForRequestToEngine[i].frameLength += frameToShift; + if (i < notesForRequestToEngine.length - 1) { + notesForRequestToEngine[i + 1].frameLength -= frameToShift; + } + } + + return notesForRequestToEngine; +}; + +const shiftKeyOfNotes = (notes: NoteForRequestToEngine[], keyShift: number) => { + for (const note of notes) { + if (note.key != undefined) { + note.key += keyShift; + } + } +}; + +const getPhonemes = (query: EditorFrameAudioQuery) => { + return query.phonemes.map((value) => value.phoneme).join(" "); +}; + +const shiftPitch = (f0: number[], pitchShift: number) => { + for (let i = 0; i < f0.length; i++) { + f0[i] *= Math.pow(2, pitchShift / 12); + } +}; + +const shiftVolume = (volume: number[], volumeShift: number) => { + for (let i = 0; i < volume.length; i++) { + volume[i] *= decibelToLinear(volumeShift); + } +}; + +/** + * 末尾のpauの区間のvolumeを0にする。(歌とpauの呼吸音が重ならないようにする) + * fadeOutDurationSecondsが0の場合は即座にvolumeを0にする。 + */ +const muteLastPauSection = ( + volume: number[], + phonemes: FramePhoneme[], + frameRate: number, + fadeOutDurationSeconds: number, +) => { + const lastPhoneme = phonemes.at(-1); + if (lastPhoneme == undefined || lastPhoneme.phoneme !== "pau") { + throw new Error("No pau exists at the end."); + } + + let lastPauStartFrame = 0; + for (let i = 0; i < phonemes.length - 1; i++) { + lastPauStartFrame += phonemes[i].frameLength; + } + + const lastPauFrameLength = lastPhoneme.frameLength; + let fadeOutFrameLength = Math.round(fadeOutDurationSeconds * frameRate); + fadeOutFrameLength = Math.max(0, fadeOutFrameLength); + fadeOutFrameLength = Math.min(lastPauFrameLength, fadeOutFrameLength); + + // フェードアウト処理を行う + if (fadeOutFrameLength === 1) { + volume[lastPauStartFrame] *= 0.5; + } else { + for (let i = 0; i < fadeOutFrameLength; i++) { + volume[lastPauStartFrame + i] *= linearInterpolation( + 0, + 1, + fadeOutFrameLength - 1, + 0, + i, + ); + } + } + // 音量を0にする + for (let i = fadeOutFrameLength; i < lastPauFrameLength; i++) { + volume[lastPauStartFrame + i] = 0; + } +}; + +const singingTeacherStyleId = StyleId(6000); // TODO: 設定できるようにする +const lastRestDurationSeconds = 0.5; // TODO: 設定できるようにする +const fadeOutDurationSeconds = 0.15; // TODO: 設定できるようにする + +/** + * フレーズレンダリングに必要なデータのスナップショット + */ +type Snapshot = Readonly<{ + tpqn: number; + tempos: Tempo[]; + tracks: Map; + engineFrameRates: Map; + editorFrameRate: number; +}>; + +/** + * フレーズ + */ +type Phrase = Readonly<{ + firstRestDuration: number; + notes: Note[]; + startTime: number; + queryKey: { + get: () => EditorFrameAudioQueryKey | undefined; + set: (value: EditorFrameAudioQueryKey | undefined) => void; + }; + singingVolumeKey: { + get: () => SingingVolumeKey | undefined; + set: (value: SingingVolumeKey | undefined) => void; + }; + singingVoiceKey: { + get: () => SingingVoiceKey | undefined; + set: (value: SingingVoiceKey | undefined) => void; + }; +}>; + +/** + * フレーズレンダリングで必要となる外部のキャッシュや関数 + */ +type ExternalDependencies = Readonly<{ + queryCache: Map; + singingVolumeCache: Map; + singingVoiceCache: Map; + + phrases: { + get: (phraseKey: PhraseKey) => Phrase; + }; + phraseQueries: { + get: (queryKey: EditorFrameAudioQueryKey) => EditorFrameAudioQuery; + set: ( + queryKey: EditorFrameAudioQueryKey, + query: EditorFrameAudioQuery, + ) => void; + delete: (queryKey: EditorFrameAudioQueryKey) => void; + }; + phraseSingingVolumes: { + get: (singingVolumeKey: SingingVolumeKey) => SingingVolume; + set: ( + singingVolumeKey: SingingVolumeKey, + singingVolume: SingingVolume, + ) => void; + delete: (singingVolumeKey: SingingVolumeKey) => void; + }; + phraseSingingVoices: { + set: (singingVoiceKey: SingingVoiceKey, singingVoice: SingingVoice) => void; + delete: (singingVoiceKey: SingingVoiceKey) => void; + }; + + fetchQuery: ( + engineId: EngineId, + engineFrameRate: number, + notes: NoteForRequestToEngine[], + ) => Promise; + fetchSingFrameVolume: ( + notes: NoteForRequestToEngine[], + query: EditorFrameAudioQuery, + engineId: EngineId, + styleId: StyleId, + ) => Promise; + synthesizeSingingVoice: ( + singer: Singer, + query: EditorFrameAudioQuery, + ) => Promise; +}>; + +/** + * フレーズレンダリングのコンテキスト + */ +type Context = Readonly<{ + snapshot: Snapshot; + trackId: TrackId; + phraseKey: PhraseKey; + externalDependencies: ExternalDependencies; +}>; + +export type PhraseRenderStageId = + | "queryGeneration" + | "singingVolumeGeneration" + | "singingVoiceSynthesis"; + +/** + * フレーズレンダリングのステージのインターフェイス。 + * フレーズレンダラー内で順に実行される。 + */ +type BaseStage = Readonly<{ + id: PhraseRenderStageId; + + /** + * このステージが実行されるべきかを判定する。 + * @param context コンテキスト + * @returns 実行が必要かどうかのブール値 + */ + shouldBeExecuted: (context: Context) => Promise; + + /** + * 前回の処理結果を削除する。 + * @param context コンテキスト + */ + deleteExecutionResult: (context: Context) => void; + + /** + * ステージの処理を実行する。 + * @param context コンテキスト + */ + execute: (context: Context) => Promise; +}>; + +// クエリ生成ステージ + +/** + * クエリの生成に必要なデータ + */ +type QuerySource = Readonly<{ + engineId: EngineId; + engineFrameRate: number; + tpqn: number; + tempos: Tempo[]; + firstRestDuration: number; + notes: Note[]; + keyRangeAdjustment: number; +}>; + +const generateQuerySource = (context: Context): QuerySource => { + const { phrases } = context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + throw new Error("track.singer is undefined."); + } + const engineFrameRate = getOrThrow( + context.snapshot.engineFrameRates, + track.singer.engineId, + ); + const phrase = phrases.get(context.phraseKey); + return { + engineId: track.singer.engineId, + engineFrameRate, + tpqn: context.snapshot.tpqn, + tempos: context.snapshot.tempos, + firstRestDuration: phrase.firstRestDuration, + notes: phrase.notes, + keyRangeAdjustment: track.keyRangeAdjustment, + }; +}; + +const calculateQueryKey = async (querySource: QuerySource) => { + const hash = await calculateHash(querySource); + return EditorFrameAudioQueryKey(hash); +}; + +const generateQuery = async ( + querySource: QuerySource, + externalDependencies: ExternalDependencies, +): Promise => { + const notesForRequestToEngine = createNotesForRequestToEngine( + querySource.firstRestDuration, + lastRestDurationSeconds, + querySource.notes, + querySource.tempos, + querySource.tpqn, + querySource.engineFrameRate, + ); + + shiftKeyOfNotes(notesForRequestToEngine, -querySource.keyRangeAdjustment); + + const query = await externalDependencies.fetchQuery( + querySource.engineId, + querySource.engineFrameRate, + notesForRequestToEngine, + ); + + shiftPitch(query.f0, querySource.keyRangeAdjustment); + return query; +}; + +const queryGenerationStage: BaseStage = { + id: "queryGeneration", + shouldBeExecuted: async (context: Context) => { + const { phrases } = context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + return false; + } + const phrase = phrases.get(context.phraseKey); + const phraseQueryKey = phrase.queryKey.get(); + const querySource = generateQuerySource(context); + const queryKey = await calculateQueryKey(querySource); + return phraseQueryKey == undefined || phraseQueryKey !== queryKey; + }, + deleteExecutionResult: (context: Context) => { + const { phrases, phraseQueries } = context.externalDependencies; + + const phrase = phrases.get(context.phraseKey); + const phraseQueryKey = phrase.queryKey.get(); + if (phraseQueryKey != undefined) { + phraseQueries.delete(phraseQueryKey); + phrase.queryKey.set(undefined); + } + }, + execute: async (context: Context) => { + const { phrases, phraseQueries, queryCache } = context.externalDependencies; + + const querySource = generateQuerySource(context); + const queryKey = await calculateQueryKey(querySource); + + let query = queryCache.get(queryKey); + if (query != undefined) { + logger.info(`Loaded query from cache.`); + } else { + query = await generateQuery(querySource, context.externalDependencies); + const phonemes = getPhonemes(query); + logger.info(`Generated query. phonemes: ${phonemes}`); + queryCache.set(queryKey, query); + } + + const phrase = phrases.get(context.phraseKey); + const phraseQueryKey = phrase.queryKey.get(); + if (phraseQueryKey != undefined) { + phraseQueries.delete(phraseQueryKey); + } + phraseQueries.set(queryKey, query); + phrase.queryKey.set(queryKey); + }, +}; + +// 歌唱ボリューム生成ステージ + +/** + * 歌唱ボリュームの生成に必要なデータ + */ +type SingingVolumeSource = Readonly<{ + engineId: EngineId; + engineFrameRate: number; + tpqn: number; + tempos: Tempo[]; + firstRestDuration: number; + notes: Note[]; + keyRangeAdjustment: number; + volumeRangeAdjustment: number; + queryForVolumeGeneration: EditorFrameAudioQuery; +}>; + +const generateSingingVolumeSource = (context: Context): SingingVolumeSource => { + const { phrases, phraseQueries } = context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + throw new Error("track.singer is undefined."); + } + const phrase = phrases.get(context.phraseKey); + const phraseQueryKey = phrase.queryKey.get(); + if (phraseQueryKey == undefined) { + throw new Error("phraseQueryKey is undefined."); + } + const query = phraseQueries.get(phraseQueryKey); + const clonedQuery = cloneWithUnwrapProxy(query); + applyPitchEdit( + clonedQuery, + phrase.startTime, + track.pitchEditData, + context.snapshot.editorFrameRate, + ); + return { + engineId: track.singer.engineId, + engineFrameRate: query.frameRate, + tpqn: context.snapshot.tpqn, + tempos: context.snapshot.tempos, + firstRestDuration: phrase.firstRestDuration, + notes: phrase.notes, + keyRangeAdjustment: track.keyRangeAdjustment, + volumeRangeAdjustment: track.volumeRangeAdjustment, + queryForVolumeGeneration: clonedQuery, + }; +}; + +const calculateSingingVolumeKey = async ( + singingVolumeSource: SingingVolumeSource, +) => { + const hash = await calculateHash(singingVolumeSource); + return SingingVolumeKey(hash); +}; + +const generateSingingVolume = async ( + singingVolumeSource: SingingVolumeSource, + externalDependencies: ExternalDependencies, +) => { + const notesForRequestToEngine = createNotesForRequestToEngine( + singingVolumeSource.firstRestDuration, + lastRestDurationSeconds, + singingVolumeSource.notes, + singingVolumeSource.tempos, + singingVolumeSource.tpqn, + singingVolumeSource.engineFrameRate, + ); + const queryForVolumeGeneration = singingVolumeSource.queryForVolumeGeneration; + + shiftKeyOfNotes( + notesForRequestToEngine, + -singingVolumeSource.keyRangeAdjustment, + ); + shiftPitch( + queryForVolumeGeneration.f0, + -singingVolumeSource.keyRangeAdjustment, + ); + + const singingVolume = await externalDependencies.fetchSingFrameVolume( + notesForRequestToEngine, + queryForVolumeGeneration, + singingVolumeSource.engineId, + singingTeacherStyleId, + ); + + shiftVolume(singingVolume, singingVolumeSource.volumeRangeAdjustment); + muteLastPauSection( + singingVolume, + queryForVolumeGeneration.phonemes, + singingVolumeSource.engineFrameRate, + fadeOutDurationSeconds, + ); + return singingVolume; +}; + +const singingVolumeGenerationStage: BaseStage = { + id: "singingVolumeGeneration", + shouldBeExecuted: async (context: Context) => { + const { phrases } = context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + return false; + } + const singingVolumeSource = generateSingingVolumeSource(context); + const singingVolumeKey = + await calculateSingingVolumeKey(singingVolumeSource); + const phrase = phrases.get(context.phraseKey); + const phraseSingingVolumeKey = phrase.singingVolumeKey.get(); + return ( + phraseSingingVolumeKey == undefined || + phraseSingingVolumeKey !== singingVolumeKey + ); + }, + deleteExecutionResult: (context: Context) => { + const { phrases, phraseSingingVolumes } = context.externalDependencies; + + const phrase = phrases.get(context.phraseKey); + const phraseSingingVolumeKey = phrase.singingVolumeKey.get(); + if (phraseSingingVolumeKey != undefined) { + phraseSingingVolumes.delete(phraseSingingVolumeKey); + phrase.singingVolumeKey.set(undefined); + } + }, + execute: async (context: Context) => { + const { phrases, phraseSingingVolumes, singingVolumeCache } = + context.externalDependencies; + + const singingVolumeSource = generateSingingVolumeSource(context); + const singingVolumeKey = + await calculateSingingVolumeKey(singingVolumeSource); + + let singingVolume = singingVolumeCache.get(singingVolumeKey); + if (singingVolume != undefined) { + logger.info(`Loaded singing volume from cache.`); + } else { + singingVolume = await generateSingingVolume( + singingVolumeSource, + context.externalDependencies, + ); + logger.info(`Generated singing volume.`); + singingVolumeCache.set(singingVolumeKey, singingVolume); + } + + const phrase = phrases.get(context.phraseKey); + const phraseSingingVolumeKey = phrase.singingVolumeKey.get(); + if (phraseSingingVolumeKey != undefined) { + phraseSingingVolumes.delete(phraseSingingVolumeKey); + } + phraseSingingVolumes.set(singingVolumeKey, singingVolume); + phrase.singingVolumeKey.set(singingVolumeKey); + }, +}; + +// 歌唱音声合成ステージ + +/** + * 歌唱音声の合成に必要なデータ + */ +type SingingVoiceSource = Readonly<{ + singer: Singer; + queryForSingingVoiceSynthesis: EditorFrameAudioQuery; +}>; + +const generateSingingVoiceSource = (context: Context): SingingVoiceSource => { + const { phrases, phraseQueries, phraseSingingVolumes } = + context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + throw new Error("track.singer is undefined."); + } + const phrase = phrases.get(context.phraseKey); + const phraseQueryKey = phrase.queryKey.get(); + const phraseSingingVolumeKey = phrase.singingVolumeKey.get(); + if (phraseQueryKey == undefined) { + throw new Error("phraseQueryKey is undefined."); + } + if (phraseSingingVolumeKey == undefined) { + throw new Error("phraseSingingVolumeKey is undefined."); + } + const query = phraseQueries.get(phraseQueryKey); + const singingVolume = phraseSingingVolumes.get(phraseSingingVolumeKey); + const clonedQuery = cloneWithUnwrapProxy(query); + const clonedSingingVolume = cloneWithUnwrapProxy(singingVolume); + applyPitchEdit( + clonedQuery, + phrase.startTime, + track.pitchEditData, + context.snapshot.editorFrameRate, + ); + clonedQuery.volume = clonedSingingVolume; + return { + singer: track.singer, + queryForSingingVoiceSynthesis: clonedQuery, + }; +}; + +const calculateSingingVoiceKey = async ( + singingVoiceSource: SingingVoiceSource, +) => { + const hash = await calculateHash(singingVoiceSource); + return SingingVoiceKey(hash); +}; + +const synthesizeSingingVoice = async ( + singingVoiceSource: SingingVoiceSource, + externalDependencies: ExternalDependencies, +) => { + const singingVoice = await externalDependencies.synthesizeSingingVoice( + singingVoiceSource.singer, + singingVoiceSource.queryForSingingVoiceSynthesis, + ); + return singingVoice; +}; + +const singingVoiceSynthesisStage: BaseStage = { + id: "singingVoiceSynthesis", + shouldBeExecuted: async (context: Context) => { + const { phrases } = context.externalDependencies; + + const track = getOrThrow(context.snapshot.tracks, context.trackId); + if (track.singer == undefined) { + return false; + } + const singingVoiceSource = generateSingingVoiceSource(context); + const singingVoiceKey = await calculateSingingVoiceKey(singingVoiceSource); + const phrase = phrases.get(context.phraseKey); + const phraseSingingVoiceKey = phrase.singingVoiceKey.get(); + return ( + phraseSingingVoiceKey == undefined || + phraseSingingVoiceKey !== singingVoiceKey + ); + }, + deleteExecutionResult: (context: Context) => { + const { phrases, phraseSingingVoices } = context.externalDependencies; + + const phrase = phrases.get(context.phraseKey); + const phraseSingingVoiceKey = phrase.singingVoiceKey.get(); + if (phraseSingingVoiceKey != undefined) { + phraseSingingVoices.delete(phraseSingingVoiceKey); + phrase.singingVoiceKey.set(undefined); + } + }, + execute: async (context: Context) => { + const { phrases, phraseSingingVoices, singingVoiceCache } = + context.externalDependencies; + + const singingVoiceSource = generateSingingVoiceSource(context); + const singingVoiceKey = await calculateSingingVoiceKey(singingVoiceSource); + + let singingVoice = singingVoiceCache.get(singingVoiceKey); + if (singingVoice != undefined) { + logger.info(`Loaded singing voice from cache.`); + } else { + singingVoice = await synthesizeSingingVoice( + singingVoiceSource, + context.externalDependencies, + ); + logger.info(`Generated singing voice.`); + singingVoiceCache.set(singingVoiceKey, singingVoice); + } + + const phrase = phrases.get(context.phraseKey); + const phraseSingingVoiceKey = phrase.singingVoiceKey.get(); + if (phraseSingingVoiceKey != undefined) { + phraseSingingVoices.delete(phraseSingingVoiceKey); + } + phraseSingingVoices.set(singingVoiceKey, singingVoice); + phrase.singingVoiceKey.set(singingVoiceKey); + }, +}; + +// フレーズレンダラー + +/** + * フレーズレンダラー。 + * 各フレーズごとに、ステージを進めながらレンダリング処理を行う。 + */ +export type PhraseRenderer = Readonly<{ + /** + * 一番最初のステージのIDを返す。 + * 一度もレンダリングを行っていないフレーズは、 + * この(一番最初の)ステージからレンダリング処理を開始する必要がある。 + * @returns ステージID + */ + getFirstRenderStageId: () => PhraseRenderStageId; + + /** + * レンダリングが必要なフレーズかどうかを判断し、 + * レンダリングが必要であればどのステージから開始されるべきかを判断して、そのステージのIDを返す。 + * レンダリングが必要ない場合、undefinedが返される。 + * @param snapshot スナップショット + * @param trackId トラックID + * @param phraseKey フレーズキー + * @returns ステージID または undefined + */ + determineStartStage: ( + snapshot: Snapshot, + trackId: TrackId, + phraseKey: PhraseKey, + ) => Promise; + + /** + * 指定されたフレーズのレンダリング処理を、指定されたステージから開始する。 + * レンダリング処理を開始する前に、前回のレンダリング処理結果の削除が行われる。 + * @param snapshot スナップショット + * @param trackId トラックID + * @param phraseKey フレーズキー + * @param startStageId 開始ステージID + */ + render: ( + snapshot: Snapshot, + trackId: TrackId, + phraseKey: PhraseKey, + startStageId: PhraseRenderStageId, + ) => Promise; +}>; + +const stages: readonly BaseStage[] = [ + queryGenerationStage, + singingVolumeGenerationStage, + singingVoiceSynthesisStage, +]; + +/** + * フレーズレンダラーを作成する。 + * @param externalDependencies レンダリング処理で必要となる外部のキャッシュや関数 + * @returns フレーズレンダラー + */ +export const createPhraseRenderer = ( + externalDependencies: ExternalDependencies, +): PhraseRenderer => { + return { + getFirstRenderStageId: () => { + return stages[0].id; + }, + determineStartStage: async ( + snapshot: Snapshot, + trackId: TrackId, + phraseKey: PhraseKey, + ) => { + const context: Context = { + snapshot, + trackId, + phraseKey, + externalDependencies, + }; + for (const stage of stages) { + if (await stage.shouldBeExecuted(context)) { + return stage.id; + } + } + return undefined; + }, + render: async ( + snapshot: Snapshot, + trackId: TrackId, + phraseKey: PhraseKey, + startStageId: PhraseRenderStageId, + ) => { + const context: Context = { + snapshot, + trackId, + phraseKey, + externalDependencies, + }; + const startStageIndex = stages.findIndex((value) => { + return value.id === startStageId; + }); + if (startStageIndex === -1) { + throw new Error("Stage not found."); + } + for (let i = stages.length - 1; i >= startStageIndex; i--) { + stages[i].deleteExecutionResult(context); + } + for (let i = startStageIndex; i < stages.length; i++) { + await stages[i].execute(context); + } + }, + }; +}; diff --git a/src/store/singing.ts b/src/store/singing.ts index 7369744dc6..9dc9aa71ea 100644 --- a/src/store/singing.ts +++ b/src/store/singing.ts @@ -14,14 +14,16 @@ import { Singer, Phrase, transformCommandStore, - SingingGuide, SingingVoice, - SingingGuideSourceHash, - SingingVoiceSourceHash, SequencerEditTarget, - PhraseSourceHash, + PhraseKey, Track, SequenceId, + SingingVolumeKey, + SingingVolume, + SingingVoiceKey, + EditorFrameAudioQueryKey, + EditorFrameAudioQuery, } from "./type"; import { DEFAULT_PROJECT_NAME, sanitizeFileName } from "./utility"; import { @@ -31,7 +33,7 @@ import { StyleId, TrackId, } from "@/type/preload"; -import { FrameAudioQuery, Note as NoteForRequestToEngine } from "@/openapi"; +import { Note as NoteForRequestToEngine } from "@/openapi"; import { ResultError, getValueOrThrow } from "@/type/result"; import { AudioEvent, @@ -57,18 +59,14 @@ import { isValidVolumeRangeAdjustment, secondToTick, tickToSecond, - calculateSingingGuideSourceHash, - calculateSingingVoiceSourceHash, - decibelToLinear, - applyPitchEdit, VALUE_INDICATING_NO_DATA, isValidPitchEditData, - calculatePhraseSourceHash, + calculatePhraseKey, isValidTempos, isValidTimeSignatures, isValidTpqn, DEFAULT_TPQN, - DEPRECATED_DEFAULT_EDIT_FRAME_RATE, + DEPRECATED_DEFAULT_EDITOR_FRAME_RATE, createDefaultTrack, createDefaultTempo, createDefaultTimeSignature, @@ -86,7 +84,6 @@ import { import { AnimationTimer, createPromiseThatResolvesWhen, - linearInterpolation, round, } from "@/sing/utility"; import { getWorkaroundKeyRangeAdjustment } from "@/sing/workaroundKeyRangeAdjustment"; @@ -98,6 +95,10 @@ import { ufProjectToVoicevox } from "@/sing/utaformatixProject/toVoicevox"; import { uuid4 } from "@/helpers/random"; import { convertToWavFileData } from "@/sing/convertToWavFileData"; import { generateWriteErrorMessage } from "@/helpers/fileHelper"; +import { + PhraseRenderStageId, + createPhraseRenderer, +} from "@/sing/phraseRendering"; const logger = createLogger("store/singing"); @@ -156,9 +157,8 @@ const offlineRenderTracks = async ( withLimiter: boolean, multiTrackEnabled: boolean, tracks: Map, - phrases: Map, - singingGuides: Map, - singingVoices: Map, + phrases: Map, + singingVoices: Map, ) => { const offlineAudioContext = new OfflineAudioContext( numberOfChannels, @@ -182,21 +182,16 @@ const offlineRenderTracks = async ( } for (const phrase of phrases.values()) { - if ( - phrase.singingGuideKey == undefined || - phrase.singingVoiceKey == undefined || - phrase.state !== "PLAYABLE" - ) { + if (phrase.singingVoiceKey == undefined || phrase.state !== "PLAYABLE") { continue; } - const singingGuide = getOrThrow(singingGuides, phrase.singingGuideKey); const singingVoice = getOrThrow(singingVoices, phrase.singingVoiceKey); // TODO: この辺りの処理を共通化する const audioEvents = await generateAudioEvents( offlineAudioContext, - singingGuide.startTime, - singingVoice.blob, + phrase.startTime, + singingVoice, ); const audioPlayer = new AudioPlayer(offlineAudioContext); const audioSequence: AudioSequence = { @@ -249,12 +244,13 @@ if (window.AudioContext) { } const playheadPosition = new FrequentlyUpdatedState(0); -const singingVoices = new Map(); +const phraseSingingVoices = new Map(); const sequences = new Map(); const animationTimer = new AnimationTimer(); -const singingGuideCache = new Map(); -const singingVoiceCache = new Map(); +const queryCache = new Map(); +const singingVolumeCache = new Map(); +const singingVoiceCache = new Map(); const initialTrackId = TrackId(crypto.randomUUID()); @@ -417,9 +413,10 @@ export const singingStoreState: SingingStoreState = { */ _selectedTrackId: initialTrackId, - editFrameRate: DEPRECATED_DEFAULT_EDIT_FRAME_RATE, + editorFrameRate: DEPRECATED_DEFAULT_EDITOR_FRAME_RATE, phrases: new Map(), - singingGuides: new Map(), + phraseQueries: new Map(), + phraseSingingVolumes: new Map(), sequencerZoomX: 0.5, sequencerZoomY: 0.75, sequencerSnapType: 16, @@ -876,20 +873,37 @@ export const singingStore = createPartialStore({ }, }, - SET_SINGING_GUIDE_KEY_TO_PHRASE: { + SET_QUERY_KEY_TO_PHRASE: { + mutation( + state, + { + phraseKey, + queryKey, + }: { + phraseKey: PhraseKey; + queryKey: EditorFrameAudioQueryKey | undefined; + }, + ) { + const phrase = getOrThrow(state.phrases, phraseKey); + + phrase.queryKey = queryKey; + }, + }, + + SET_SINGING_VOLUME_KEY_TO_PHRASE: { mutation( state, { phraseKey, - singingGuideKey, + singingVolumeKey, }: { - phraseKey: PhraseSourceHash; - singingGuideKey: SingingGuideSourceHash | undefined; + phraseKey: PhraseKey; + singingVolumeKey: SingingVolumeKey | undefined; }, ) { const phrase = getOrThrow(state.phrases, phraseKey); - phrase.singingGuideKey = singingGuideKey; + phrase.singingVolumeKey = singingVolumeKey; }, }, @@ -900,8 +914,8 @@ export const singingStore = createPartialStore({ phraseKey, singingVoiceKey, }: { - phraseKey: PhraseSourceHash; - singingVoiceKey: SingingVoiceSourceHash | undefined; + phraseKey: PhraseKey; + singingVoiceKey: SingingVoiceKey | undefined; }, ) { const phrase = getOrThrow(state.phrases, phraseKey); @@ -917,7 +931,7 @@ export const singingStore = createPartialStore({ phraseKey, sequenceId, }: { - phraseKey: PhraseSourceHash; + phraseKey: PhraseKey; sequenceId: SequenceId | undefined; }, ) { @@ -927,27 +941,45 @@ export const singingStore = createPartialStore({ }, }, - SET_SINGING_GUIDE: { + SET_PHRASE_QUERY: { mutation( state, { - singingGuideKey, - singingGuide, + queryKey, + query, }: { - singingGuideKey: SingingGuideSourceHash; - singingGuide: SingingGuide; + queryKey: EditorFrameAudioQueryKey; + query: EditorFrameAudioQuery; }, ) { - state.singingGuides.set(singingGuideKey, singingGuide); + state.phraseQueries.set(queryKey, query); }, }, - DELETE_SINGING_GUIDE: { + DELETE_PHRASE_QUERY: { + mutation(state, { queryKey }: { queryKey: EditorFrameAudioQueryKey }) { + state.phraseQueries.delete(queryKey); + }, + }, + + SET_PHRASE_SINGING_VOLUME: { + mutation( + state, + { + singingVolumeKey, + singingVolume, + }: { singingVolumeKey: SingingVolumeKey; singingVolume: SingingVolume }, + ) { + state.phraseSingingVolumes.set(singingVolumeKey, singingVolume); + }, + }, + + DELETE_PHRASE_SINGING_VOLUME: { mutation( state, - { singingGuideKey }: { singingGuideKey: SingingGuideSourceHash }, + { singingVolumeKey }: { singingVolumeKey: SingingVolumeKey }, ) { - state.singingGuides.delete(singingGuideKey); + state.phraseSingingVolumes.delete(singingVolumeKey); }, }, @@ -1333,6 +1365,19 @@ export const singingStore = createPartialStore({ return phraseFirstRestDuration; }; + const calculatePhraseStartTime = ( + phraseFirstRestDuration: number, + phraseNotes: Note[], + tempos: Tempo[], + tpqn: number, + ) => { + return tickToSecond( + phraseNotes[0].position - phraseFirstRestDuration, + tempos, + tpqn, + ); + }; + const searchPhrases = async ( notes: Note[], tempos: Tempo[], @@ -1340,7 +1385,7 @@ export const singingStore = createPartialStore({ phraseFirstRestMinDurationSeconds: number, trackId: TrackId, ) => { - const foundPhrases = new Map(); + const foundPhrases = new Map(); let phraseNotes: Note[] = []; let prevPhraseLastNote: Note | undefined = undefined; @@ -1365,14 +1410,22 @@ export const singingStore = createPartialStore({ tempos, tpqn, ); - const notesHash = await calculatePhraseSourceHash({ + const phraseStartTime = calculatePhraseStartTime( + phraseFirstRestDuration, + phraseNotes, + tempos, + tpqn, + ); + const phraseKey = await calculatePhraseKey({ firstRestDuration: phraseFirstRestDuration, notes: phraseNotes, + startTime: phraseStartTime, trackId, }); - foundPhrases.set(notesHash, { + foundPhrases.set(phraseKey, { firstRestDuration: phraseFirstRestDuration, notes: phraseNotes, + startTime: phraseStartTime, state: "WAITING_TO_BE_RENDERED", trackId, }); @@ -1386,93 +1439,11 @@ export const singingStore = createPartialStore({ return foundPhrases; }; - // リクエスト用のノーツ(と休符)を作成する - const createNotesForRequestToEngine = ( - firstRestDuration: number, - lastRestDurationSeconds: number, - notes: Note[], - tempos: Tempo[], - tpqn: number, - frameRate: number, - ) => { - const notesForRequestToEngine: NoteForRequestToEngine[] = []; - - // 先頭の休符を変換 - const firstRestStartSeconds = tickToSecond( - notes[0].position - firstRestDuration, - tempos, - tpqn, - ); - const firstRestStartFrame = Math.round( - firstRestStartSeconds * frameRate, - ); - const firstRestEndSeconds = tickToSecond( - notes[0].position, - tempos, - tpqn, - ); - const firstRestEndFrame = Math.round(firstRestEndSeconds * frameRate); - notesForRequestToEngine.push({ - key: undefined, - frameLength: firstRestEndFrame - firstRestStartFrame, - lyric: "", - }); - - // ノートを変換 - for (const note of notes) { - const noteOnSeconds = tickToSecond(note.position, tempos, tpqn); - const noteOnFrame = Math.round(noteOnSeconds * frameRate); - const noteOffSeconds = tickToSecond( - note.position + note.duration, - tempos, - tpqn, - ); - const noteOffFrame = Math.round(noteOffSeconds * frameRate); - notesForRequestToEngine.push({ - key: note.noteNumber, - frameLength: noteOffFrame - noteOnFrame, - lyric: note.lyric, - }); - } - - // 末尾に休符を追加 - const lastRestFrameLength = Math.round( - lastRestDurationSeconds * frameRate, - ); - notesForRequestToEngine.push({ - key: undefined, - frameLength: lastRestFrameLength, - lyric: "", - }); - - // frameLengthが1以上になるようにする - for (let i = 0; i < notesForRequestToEngine.length; i++) { - const frameLength = notesForRequestToEngine[i].frameLength; - const frameToShift = Math.max(0, 1 - frameLength); - notesForRequestToEngine[i].frameLength += frameToShift; - if (i < notesForRequestToEngine.length - 1) { - notesForRequestToEngine[i + 1].frameLength -= frameToShift; - } - } - - return notesForRequestToEngine; - }; - - const shiftKeyOfNotes = ( - notes: NoteForRequestToEngine[], - keyShift: number, - ) => { - for (const note of notes) { - if (note.key != undefined) { - note.key += keyShift; - } - } - }; - const singingTeacherStyleId = StyleId(6000); // TODO: 設定できるようにする const fetchQuery = async ( engineId: EngineId, + engineFrameRate: number, notesForRequestToEngine: NoteForRequestToEngine[], ) => { try { @@ -1482,12 +1453,17 @@ export const singingStore = createPartialStore({ const instance = await actions.INSTANTIATE_ENGINE_CONNECTOR({ engineId, }); - return await instance.invoke( + const query = await instance.invoke( "singFrameAudioQuerySingFrameAudioQueryPost", )({ score: { notes: notesForRequestToEngine }, speaker: singingTeacherStyleId, }); + const editorQuery: EditorFrameAudioQuery = { + ...query, + frameRate: engineFrameRate, + }; + return editorQuery; } catch (error) { const lyrics = notesForRequestToEngine .map((value) => value.lyric) @@ -1500,78 +1476,10 @@ export const singingStore = createPartialStore({ } }; - const getPhonemes = (frameAudioQuery: FrameAudioQuery) => { - return frameAudioQuery.phonemes.map((value) => value.phoneme).join(" "); - }; - - const shiftGuidePitch = ( - frameAudioQuery: FrameAudioQuery, - pitchShift: number, - ) => { - frameAudioQuery.f0 = frameAudioQuery.f0.map((value) => { - return value * Math.pow(2, pitchShift / 12); - }); - }; - - const shiftGuideVolume = ( - frameAudioQuery: FrameAudioQuery, - volumeShift: number, - ) => { - frameAudioQuery.volume = frameAudioQuery.volume.map((value) => { - return value * decibelToLinear(volumeShift); - }); - }; - - // 歌とpauの呼吸音が重ならないようにvolumeを制御する - // fadeOutDurationSecondsが0の場合は即座にvolumeを0にする - const muteLastPauSection = ( - frameAudioQuery: FrameAudioQuery, - frameRate: number, - fadeOutDurationSeconds: number, - ) => { - const lastPhoneme = frameAudioQuery.phonemes.at(-1); - if (lastPhoneme == undefined || lastPhoneme.phoneme !== "pau") { - throw new Error("No pau exists at the end."); - } - - let lastPauStartFrame = 0; - for (let i = 0; i < frameAudioQuery.phonemes.length - 1; i++) { - lastPauStartFrame += frameAudioQuery.phonemes[i].frameLength; - } - - const lastPauFrameLength = lastPhoneme.frameLength; - let fadeOutFrameLength = Math.round(fadeOutDurationSeconds * frameRate); - fadeOutFrameLength = Math.max(0, fadeOutFrameLength); - fadeOutFrameLength = Math.min(lastPauFrameLength, fadeOutFrameLength); - - // フェードアウト処理を行う - if (fadeOutFrameLength === 1) { - frameAudioQuery.volume[lastPauStartFrame] *= 0.5; - } else { - for (let i = 0; i < fadeOutFrameLength; i++) { - frameAudioQuery.volume[lastPauStartFrame + i] *= - linearInterpolation(0, 1, fadeOutFrameLength - 1, 0, i); - } - } - // 音量を0にする - for (let i = fadeOutFrameLength; i < lastPauFrameLength; i++) { - frameAudioQuery.volume[lastPauStartFrame + i] = 0; - } - }; - - const calculateStartTime = ( - phrase: Phrase, - tempos: Tempo[], - tpqn: number, + const synthesizeSingingVoice = async ( + singer: Singer, + query: EditorFrameAudioQuery, ) => { - return tickToSecond( - phrase.notes[0].position - phrase.firstRestDuration, - tempos, - tpqn, - ); - }; - - const synthesize = async (singer: Singer, query: FrameAudioQuery) => { if (!getters.IS_ENGINE_READY(singer.engineId)) { throw new Error("Engine not ready."); } @@ -1605,63 +1513,141 @@ export const singingStore = createPartialStore({ * @param phraseKey フレーズのキー * @returns シーケンスID */ - const getPhraseSequenceId = (phraseKey: PhraseSourceHash) => { + const getPhraseSequenceId = (phraseKey: PhraseKey) => { return getOrThrow(state.phrases, phraseKey).sequenceId; }; + /** + * フレーズが持つ歌声のキーを取得する。 + * @param phraseKey フレーズのキー + * @returns 歌声のキー + */ + const getPhraseSingingVoiceKey = (phraseKey: PhraseKey) => { + return getOrThrow(state.phrases, phraseKey).singingVoiceKey; + }; + const render = async () => { if (!audioContext) { throw new Error("audioContext is undefined."); } const audioContextRef = audioContext; - // レンダリング中に変更される可能性のあるデータをコピーする - const tracks = cloneWithUnwrapProxy(state.tracks); - - const overlappingNoteIdsMap = new Map( - [...tracks.keys()].map((trackId) => [ - trackId, - getters.OVERLAPPING_NOTE_IDS(trackId), - ]), - ); + const firstRestMinDurationSeconds = 0.12; - const singerAndFrameRates = new Map( - [...tracks].map(([trackId, track]) => [ - trackId, - track.singer - ? { - singer: track.singer, - frameRate: - state.engineManifests[track.singer.engineId].frameRate, - } - : undefined, - ]), - ); - const tpqn = state.tpqn; - const tempos = state.tempos.map((value) => ({ ...value })); - const editFrameRate = state.editFrameRate; + // レンダリング中に変更される可能性のあるデータのコピー + const snapshot = { + tpqn: state.tpqn, + tempos: cloneWithUnwrapProxy(state.tempos), + tracks: cloneWithUnwrapProxy(state.tracks), + trackOverlappingNoteIds: new Map( + [...state.tracks.keys()].map((trackId) => [ + trackId, + getters.OVERLAPPING_NOTE_IDS(trackId), + ]), + ), + engineFrameRates: new Map( + Object.entries(state.engineManifests).map( + ([engineId, engineManifest]) => [ + engineId as EngineId, + engineManifest.frameRate, + ], + ), + ), + editorFrameRate: state.editorFrameRate, + } as const; + + const phraseRenderer = createPhraseRenderer({ + queryCache, + singingVolumeCache, + singingVoiceCache, + phrases: { + get: (phraseKey: PhraseKey) => { + const phrase = getOrThrow(state.phrases, phraseKey); + return { + firstRestDuration: phrase.firstRestDuration, + notes: phrase.notes, + startTime: phrase.startTime, + queryKey: { + get: () => getOrThrow(state.phrases, phraseKey).queryKey, + set: (value) => + mutations.SET_QUERY_KEY_TO_PHRASE({ + phraseKey, + queryKey: value, + }), + }, + singingVolumeKey: { + get: () => + getOrThrow(state.phrases, phraseKey).singingVolumeKey, + set: (value) => + mutations.SET_SINGING_VOLUME_KEY_TO_PHRASE({ + phraseKey, + singingVolumeKey: value, + }), + }, + singingVoiceKey: { + get: () => + getOrThrow(state.phrases, phraseKey).singingVoiceKey, + set: (value) => + mutations.SET_SINGING_VOICE_KEY_TO_PHRASE({ + phraseKey, + singingVoiceKey: value, + }), + }, + }; + }, + }, + phraseQueries: { + get: (queryKey) => getOrThrow(state.phraseQueries, queryKey), + set: (queryKey, query) => + mutations.SET_PHRASE_QUERY({ queryKey, query }), + delete: (queryKey) => mutations.DELETE_PHRASE_QUERY({ queryKey }), + }, + phraseSingingVolumes: { + get: (singingVolumeKey) => + getOrThrow(state.phraseSingingVolumes, singingVolumeKey), + set: (singingVolumeKey, singingVolume) => + mutations.SET_PHRASE_SINGING_VOLUME({ + singingVolumeKey, + singingVolume, + }), + delete: (singingVolumeKey) => + mutations.DELETE_PHRASE_SINGING_VOLUME({ singingVolumeKey }), + }, + phraseSingingVoices: { + set: (singingVoiceKey, singingVoice) => + phraseSingingVoices.set(singingVoiceKey, singingVoice), + delete: (singingVoiceKey) => + phraseSingingVoices.delete(singingVoiceKey), + }, + fetchQuery, + fetchSingFrameVolume: (notes, query, engineId, styleId) => + actions.FETCH_SING_FRAME_VOLUME({ + notes, + query, + engineId, + styleId, + }), + synthesizeSingingVoice, + }); - const firstRestMinDurationSeconds = 0.12; - const lastRestDurationSeconds = 0.5; - const fadeOutDurationSeconds = 0.15; + const renderStartStageIds = new Map(); // フレーズを更新する - const foundPhrases = new Map(); - for (const [trackId, track] of tracks) { - if (!track.singer) { - continue; - } - + const foundPhrases = new Map(); + for (const [trackId, track] of snapshot.tracks) { // 重なっているノートを削除する - const overlappingNoteIds = getOrThrow(overlappingNoteIdsMap, trackId); + const overlappingNoteIds = getOrThrow( + snapshot.trackOverlappingNoteIds, + trackId, + ); const notes = track.notes.filter( (value) => !overlappingNoteIds.has(value.id), ); const phrases = await searchPhrases( notes, - tempos, - tpqn, + snapshot.tempos, + snapshot.tpqn, firstRestMinDurationSeconds, trackId, ); @@ -1670,8 +1656,8 @@ export const singingStore = createPartialStore({ } } - const phrases = new Map(); - const disappearedPhraseKeys = new Set(); + const phrases = new Map(); + const disappearedPhraseKeys = new Set(); for (const phraseKey of state.phrases.keys()) { if (!foundPhrases.has(phraseKey)) { @@ -1680,161 +1666,87 @@ export const singingStore = createPartialStore({ } } for (const [phraseKey, foundPhrase] of foundPhrases) { + // 新しいフレーズまたは既存のフレーズの場合 const existingPhrase = state.phrases.get(phraseKey); - if (!existingPhrase) { - // 新しいフレーズの場合 - phrases.set(phraseKey, foundPhrase); - continue; - } - - const track = getOrThrow(tracks, existingPhrase.trackId); - - const singerAndFrameRate = getOrThrow( - singerAndFrameRates, - existingPhrase.trackId, - ); - - // すでに存在するフレーズの場合 - // 再レンダリングする必要があるかどうかをチェックする - // シンガーが未設定の場合、とりあえず常に再レンダリングする - // 音声合成を行う必要がある場合、singingVoiceKeyをundefinedにする - // 歌い方の推論も行う必要がある場合、singingGuideKeyとsingingVoiceKeyをundefinedにする - // TODO: リファクタリングする - const phrase = { ...existingPhrase }; - if (!singerAndFrameRate || phrase.state === "COULD_NOT_RENDER") { - if (phrase.singingGuideKey != undefined) { - phrase.singingGuideKey = undefined; - } - if (phrase.singingVoiceKey != undefined) { - phrase.singingVoiceKey = undefined; - } - } else if (phrase.singingGuideKey != undefined) { - const calculatedHash = await calculateSingingGuideSourceHash({ - engineId: singerAndFrameRate.singer.engineId, - tpqn, - tempos, - firstRestDuration: phrase.firstRestDuration, - lastRestDurationSeconds, - notes: phrase.notes, - keyRangeAdjustment: track.keyRangeAdjustment, - volumeRangeAdjustment: track.volumeRangeAdjustment, - frameRate: singerAndFrameRate.frameRate, - }); - const hash = phrase.singingGuideKey; - if (hash !== calculatedHash) { - phrase.singingGuideKey = undefined; - if (phrase.singingVoiceKey != undefined) { - phrase.singingVoiceKey = undefined; - } - } else if (phrase.singingVoiceKey != undefined) { - let singingGuide = getOrThrow( - state.singingGuides, - phrase.singingGuideKey, - ); - - // 歌い方をコピーして、ピッチ編集を適用する - singingGuide = structuredClone(toRaw(singingGuide)); - applyPitchEdit(singingGuide, track.pitchEditData, editFrameRate); - - const calculatedHash = await calculateSingingVoiceSourceHash({ - singer: singerAndFrameRate.singer, - frameAudioQuery: singingGuide.query, - }); - const hash = phrase.singingVoiceKey; - if (hash !== calculatedHash) { - phrase.singingVoiceKey = undefined; - } + const phrase = + existingPhrase == undefined + ? foundPhrase + : cloneWithUnwrapProxy(existingPhrase); + const track = getOrThrow(snapshot.tracks, phrase.trackId); + if (track.singer == undefined) { + phrase.state = "SINGER_IS_NOT_SET"; + } else { + // 新しいフレーズの場合は最初からレンダリングする + // phrase.stateがCOULD_NOT_RENDERだった場合は最初からレンダリングし直す + // 既存のフレーズの場合は適切なレンダリング開始ステージを決定する + const renderStartStageId = + existingPhrase == undefined || phrase.state === "COULD_NOT_RENDER" + ? phraseRenderer.getFirstRenderStageId() + : await phraseRenderer.determineStartStage( + snapshot, + foundPhrase.trackId, + phraseKey, + ); + if (renderStartStageId != undefined) { + renderStartStageIds.set(phraseKey, renderStartStageId); + phrase.state = "WAITING_TO_BE_RENDERED"; } } - phrases.set(phraseKey, phrase); } - // フレーズのstateを更新する - for (const phrase of phrases.values()) { - if ( - phrase.singingGuideKey == undefined || - phrase.singingVoiceKey == undefined - ) { - phrase.state = "WAITING_TO_BE_RENDERED"; - } - } - // 無くなったフレーズのシーケンスを削除する for (const phraseKey of disappearedPhraseKeys) { const phraseSequenceId = getPhraseSequenceId(phraseKey); if (phraseSequenceId != undefined) { deleteSequence(phraseSequenceId); - mutations.SET_SEQUENCE_ID_TO_PHRASE({ - phraseKey, - sequenceId: undefined, - }); } } - // 使われていない歌い方と歌声を削除する - const singingGuideKeysInUse = new Set( - [...phrases.values()] - .map((value) => value.singingGuideKey) - .filter((value) => value != undefined), - ); - const singingVoiceKeysInUse = new Set( - [...phrases.values()] - .map((value) => value.singingVoiceKey) - .filter((value) => value != undefined), - ); - const existingSingingGuideKeys = new Set(state.singingGuides.keys()); - const existingSingingVoiceKeys = new Set(singingVoices.keys()); - const singingGuideKeysToDelete = existingSingingGuideKeys.difference( - singingGuideKeysInUse, - ); - const singingVoiceKeysToDelete = existingSingingVoiceKeys.difference( - singingVoiceKeysInUse, - ); - for (const singingGuideKey of singingGuideKeysToDelete) { - mutations.DELETE_SINGING_GUIDE({ singingGuideKey }); - } - for (const singingVoiceKey of singingVoiceKeysToDelete) { - singingVoices.delete(singingVoiceKey); - } - mutations.SET_PHRASES({ phrases }); logger.info("Phrases updated."); // 各フレーズのレンダリングを行う + for (const [phraseKey, phrase] of state.phrases.entries()) { + if ( + phrase.state === "SINGER_IS_NOT_SET" || + phrase.state === "WAITING_TO_BE_RENDERED" + ) { + // シーケンスが存在する場合は、シーケンスを削除する + // TODO: ピッチを編集したときは行わないようにする + const phraseSequenceId = getPhraseSequenceId(phraseKey); + if (phraseSequenceId != undefined) { + deleteSequence(phraseSequenceId); + mutations.SET_SEQUENCE_ID_TO_PHRASE({ + phraseKey, + sequenceId: undefined, + }); + } + + // ノートシーケンスを作成して登録し、プレビュー音が鳴るようにする + const noteEvents = generateNoteEvents( + phrase.notes, + snapshot.tempos, + snapshot.tpqn, + ); + const polySynth = new PolySynth(audioContextRef); + const sequenceId = SequenceId(uuid4()); + registerSequence(sequenceId, { + type: "note", + instrument: polySynth, + noteEvents, + trackId: phrase.trackId, + }); + mutations.SET_SEQUENCE_ID_TO_PHRASE({ phraseKey, sequenceId }); + } + } const phrasesToBeRendered = new Map( [...state.phrases.entries()].filter(([, phrase]) => { return phrase.state === "WAITING_TO_BE_RENDERED"; }), ); - for (const [phraseKey, phrase] of phrasesToBeRendered) { - // シーケンスが存在する場合は、シーケンスを削除する - // TODO: ピッチを編集したときは行わないようにする - - const phraseSequenceId = getPhraseSequenceId(phraseKey); - if (phraseSequenceId != undefined) { - deleteSequence(phraseSequenceId); - mutations.SET_SEQUENCE_ID_TO_PHRASE({ - phraseKey, - sequenceId: undefined, - }); - } - - // ノートシーケンスを作成して登録し、プレビュー音が鳴るようにする - - const noteEvents = generateNoteEvents(phrase.notes, tempos, tpqn); - const polySynth = new PolySynth(audioContextRef); - const sequenceId = SequenceId(uuid4()); - registerSequence(sequenceId, { - type: "note", - instrument: polySynth, - noteEvents, - trackId: phrase.trackId, - }); - mutations.SET_SEQUENCE_ID_TO_PHRASE({ phraseKey, sequenceId }); - } while (phrasesToBeRendered.size > 0) { if (startRenderingRequested() || stopRenderingRequested()) { return; @@ -1845,175 +1757,21 @@ export const singingStore = createPartialStore({ ); phrasesToBeRendered.delete(phraseKey); - const track = getOrThrow(tracks, phrase.trackId); - - const singerAndFrameRate = getOrThrow( - singerAndFrameRates, - phrase.trackId, - ); - - // シンガーが未設定の場合は、歌い方の生成や音声合成は行わない - - if (!singerAndFrameRate) { - mutations.SET_STATE_TO_PHRASE({ - phraseKey, - phraseState: "PLAYABLE", - }); - continue; - } - mutations.SET_STATE_TO_PHRASE({ phraseKey, phraseState: "NOW_RENDERING", }); try { - // リクエスト(クエリ生成と音量生成)用のノーツを作る - const notesForRequestToEngine = createNotesForRequestToEngine( - phrase.firstRestDuration, - lastRestDurationSeconds, - phrase.notes, - tempos, - tpqn, - singerAndFrameRate.frameRate, - ); - - // リクエスト用のノーツのキーのシフトを行う - shiftKeyOfNotes(notesForRequestToEngine, -track.keyRangeAdjustment); - - // 歌い方が存在する場合、歌い方を取得する - // 歌い方が存在しない場合、キャッシュがあれば取得し、なければ歌い方を生成する - - let singingGuide: SingingGuide | undefined; - if (phrase.singingGuideKey != undefined) { - singingGuide = getOrThrow( - state.singingGuides, - phrase.singingGuideKey, - ); - } else { - const singingGuideSourceHash = - await calculateSingingGuideSourceHash({ - engineId: singerAndFrameRate.singer.engineId, - tpqn, - tempos, - firstRestDuration: phrase.firstRestDuration, - lastRestDurationSeconds, - notes: phrase.notes, - keyRangeAdjustment: track.keyRangeAdjustment, - volumeRangeAdjustment: track.volumeRangeAdjustment, - frameRate: singerAndFrameRate.frameRate, - }); - - const singingGuideKey = singingGuideSourceHash; - const cachedSingingGuide = singingGuideCache.get(singingGuideKey); - if (cachedSingingGuide) { - singingGuide = cachedSingingGuide; - - logger.info(`Loaded singing guide from cache.`); - } else { - // クエリを生成する - const query = await fetchQuery( - singerAndFrameRate.singer.engineId, - notesForRequestToEngine, - ); - - const phonemes = getPhonemes(query); - logger.info(`Fetched frame audio query. phonemes: ${phonemes}`); - - // ピッチのシフトを行う - shiftGuidePitch(query, track.keyRangeAdjustment); - - // フレーズの開始時刻を計算する - const startTime = calculateStartTime(phrase, tempos, tpqn); - - singingGuide = { - query, - frameRate: singerAndFrameRate.frameRate, - startTime, - }; - - singingGuideCache.set(singingGuideKey, singingGuide); - } - mutations.SET_SINGING_GUIDE({ singingGuideKey, singingGuide }); - mutations.SET_SINGING_GUIDE_KEY_TO_PHRASE({ - phraseKey, - singingGuideKey, - }); - } - - // ピッチ編集を適用する前に、歌い方をコピーする - singingGuide = structuredClone(toRaw(singingGuide)); - - // ピッチ編集を適用する - applyPitchEdit(singingGuide, track.pitchEditData, editFrameRate); - - // 歌声のキャッシュがあれば取得し、なければ音声合成を行う - - let singingVoice: SingingVoice | undefined; - - const singingVoiceSourceHash = - await calculateSingingVoiceSourceHash({ - singer: singerAndFrameRate.singer, - frameAudioQuery: singingGuide.query, - }); - - const singingVoiceKey = singingVoiceSourceHash; - const cachedSingingVoice = singingVoiceCache.get(singingVoiceKey); - if (cachedSingingVoice) { - singingVoice = cachedSingingVoice; - - logger.info(`Loaded singing voice from cache.`); - } else { - // 音量生成用のクエリを作る - // ピッチ編集を適用したクエリをコピーし、 - // f0をもう一度シフトして、元の(クエリ生成時の)高さに戻す - const queryForVolumeGeneration = structuredClone( - singingGuide.query, - ); - shiftGuidePitch( - queryForVolumeGeneration, - -track.keyRangeAdjustment, - ); - - // 音量を生成して、生成した音量を歌い方のクエリにセットする - // 音量値はAPIを叩く毎に変わるので、calc hashしたあとに音量を取得している - const volumes = await actions.FETCH_SING_FRAME_VOLUME({ - notes: notesForRequestToEngine, - frameAudioQuery: queryForVolumeGeneration, - styleId: singingTeacherStyleId, - engineId: singerAndFrameRate.singer.engineId, - }); - singingGuide.query.volume = volumes; - - // 音量のシフトを行う - shiftGuideVolume(singingGuide.query, track.volumeRangeAdjustment); - - // 末尾のpauの区間の音量を0にする - muteLastPauSection( - singingGuide.query, - singerAndFrameRate.frameRate, - fadeOutDurationSeconds, - ); - - // 音声合成を行う - const blob = await synthesize( - singerAndFrameRate.singer, - singingGuide.query, - ); - - logger.info(`Synthesized.`); - - singingVoice = { blob }; - singingVoiceCache.set(singingVoiceKey, singingVoice); - } - singingVoices.set(singingVoiceKey, singingVoice); - mutations.SET_SINGING_VOICE_KEY_TO_PHRASE({ + // フレーズのレンダリングを行う + await phraseRenderer.render( + snapshot, + phrase.trackId, phraseKey, - singingVoiceKey, - }); + getOrThrow(renderStartStageIds, phraseKey), + ); // シーケンスが存在する場合、シーケンスを削除する - const phraseSequenceId = getPhraseSequenceId(phraseKey); if (phraseSequenceId != undefined) { deleteSequence(phraseSequenceId); @@ -2024,11 +1782,18 @@ export const singingStore = createPartialStore({ } // オーディオシーケンスを作成して登録する - + const singingVoiceKey = getPhraseSingingVoiceKey(phraseKey); + if (singingVoiceKey == undefined) { + throw new Error("singingVoiceKey is undefined."); + } + const singingVoice = getOrThrow( + phraseSingingVoices, + singingVoiceKey, + ); const audioEvents = await generateAudioEvents( audioContextRef, - singingGuide.startTime, - singingVoice.blob, + phrase.startTime, + singingVoice, ); const audioPlayer = new AudioPlayer(audioContext); const sequenceId = SequenceId(uuid4()); @@ -2109,12 +1874,12 @@ export const singingStore = createPartialStore({ { actions }, { notes, - frameAudioQuery, + query, engineId, styleId, }: { notes: NoteForRequestToEngine[]; - frameAudioQuery: FrameAudioQuery; + query: EditorFrameAudioQuery; engineId: EngineId; styleId: StyleId; }, @@ -2127,7 +1892,7 @@ export const singingStore = createPartialStore({ score: { notes, }, - frameAudioQuery, + frameAudioQuery: query, }, speaker: styleId, }); @@ -2205,8 +1970,7 @@ export const singingStore = createPartialStore({ state.experimentalSetting.enableMultiTrack, state.tracks, state.phrases, - state.singingGuides, - singingVoiceCache, + phraseSingingVoices, ); const waveFileData = convertToWavFileData(audioBuffer); diff --git a/src/store/type.ts b/src/store/type.ts index f3b54213f3..70d080a5e5 100644 --- a/src/store/type.ts +++ b/src/store/type.ts @@ -738,65 +738,48 @@ export type Singer = z.infer; export type Track = z.infer; export type PhraseState = + | "SINGER_IS_NOT_SET" | "WAITING_TO_BE_RENDERED" | "NOW_RENDERING" | "COULD_NOT_RENDER" | "PLAYABLE"; /** - * 歌い方 + * エディタ用のFrameAudioQuery */ -export type SingingGuide = { - query: FrameAudioQuery; - frameRate: number; - startTime: number; -}; +export type EditorFrameAudioQuery = FrameAudioQuery & { frameRate: number }; /** - * 歌い方のソース(歌い方を生成するために必要なデータ) + * 歌唱ボリューム */ -export type SingingGuideSource = { - engineId: EngineId; - tpqn: number; - tempos: Tempo[]; - firstRestDuration: number; - lastRestDurationSeconds: number; - notes: Note[]; - keyRangeAdjustment: number; - volumeRangeAdjustment: number; - frameRate: number; -}; +export type SingingVolume = number[]; /** * 歌声 */ -export type SingingVoice = { - blob: Blob; -}; +export type SingingVoice = Blob; -/** - * 歌声のソース(歌声を合成するために必要なデータ) - */ -export type SingingVoiceSource = { - singer: Singer; - frameAudioQuery: FrameAudioQuery; -}; - -export const singingGuideSourceHashSchema = z +const editorFrameAudioQueryKeySchema = z .string() - .brand<"SingingGuideSourceHash">(); -export type SingingGuideSourceHash = z.infer< - typeof singingGuideSourceHashSchema + .brand<"EditorFrameAudioQueryKey">(); +export type EditorFrameAudioQueryKey = z.infer< + typeof editorFrameAudioQueryKeySchema >; +export const EditorFrameAudioQueryKey = ( + id: string, +): EditorFrameAudioQueryKey => editorFrameAudioQueryKeySchema.parse(id); -export const singingVoiceSourceHashSchema = z - .string() - .brand<"SingingVoiceSourceHash">(); -export type SingingVoiceSourceHash = z.infer< - typeof singingVoiceSourceHashSchema ->; +const singingVolumeKeySchema = z.string().brand<"SingingVolumeKey">(); +export type SingingVolumeKey = z.infer; +export const SingingVolumeKey = (id: string): SingingVolumeKey => + singingVolumeKeySchema.parse(id); + +const singingVoiceKeySchema = z.string().brand<"SingingVoiceKey">(); +export type SingingVoiceKey = z.infer; +export const SingingVoiceKey = (id: string): SingingVoiceKey => + singingVoiceKeySchema.parse(id); -export const sequenceIdSchema = z.string().brand<"SequenceId">(); +const sequenceIdSchema = z.string().brand<"SequenceId">(); export type SequenceId = z.infer; export const SequenceId = (id: string): SequenceId => sequenceIdSchema.parse(id); @@ -806,25 +789,29 @@ export const SequenceId = (id: string): SequenceId => */ export type Phrase = { firstRestDuration: number; - trackId: TrackId; notes: Note[]; + startTime: number; state: PhraseState; - singingGuideKey?: SingingGuideSourceHash; - singingVoiceKey?: SingingVoiceSourceHash; + queryKey?: EditorFrameAudioQueryKey; + singingVolumeKey?: SingingVolumeKey; + singingVoiceKey?: SingingVoiceKey; sequenceId?: SequenceId; + trackId: TrackId; // NOTE: state.tracksと同期していないので使用する際は注意 }; /** - * フレーズのソース + * フレーズの生成に必要なデータ */ export type PhraseSource = { firstRestDuration: number; - trackId: TrackId; notes: Note[]; + startTime: number; + trackId: TrackId; }; -export const phraseSourceHashSchema = z.string().brand<"PhraseSourceHash">(); -export type PhraseSourceHash = z.infer; +const phraseKeySchema = z.string().brand<"PhraseKey">(); +export type PhraseKey = z.infer; +export const PhraseKey = (id: string): PhraseKey => phraseKeySchema.parse(id); export type SequencerEditTarget = "NOTE" | "PITCH"; @@ -835,9 +822,10 @@ export type SingingStoreState = { tracks: Map; trackOrder: TrackId[]; _selectedTrackId: TrackId; - editFrameRate: number; - phrases: Map; - singingGuides: Map; + editorFrameRate: number; + phrases: Map; + phraseQueries: Map; + phraseSingingVolumes: Map; sequencerZoomX: number; sequencerZoomY: number; sequencerSnapType: number; @@ -980,46 +968,64 @@ export type SingingStoreTypes = { }; SET_PHRASES: { - mutation: { phrases: Map }; + mutation: { phrases: Map }; }; SET_STATE_TO_PHRASE: { mutation: { - phraseKey: PhraseSourceHash; + phraseKey: PhraseKey; phraseState: PhraseState; }; }; - SET_SINGING_GUIDE_KEY_TO_PHRASE: { + SET_QUERY_KEY_TO_PHRASE: { mutation: { - phraseKey: PhraseSourceHash; - singingGuideKey: SingingGuideSourceHash | undefined; + phraseKey: PhraseKey; + queryKey: EditorFrameAudioQueryKey | undefined; + }; + }; + + SET_SINGING_VOLUME_KEY_TO_PHRASE: { + mutation: { + phraseKey: PhraseKey; + singingVolumeKey: SingingVolumeKey | undefined; }; }; SET_SINGING_VOICE_KEY_TO_PHRASE: { mutation: { - phraseKey: PhraseSourceHash; - singingVoiceKey: SingingVoiceSourceHash | undefined; + phraseKey: PhraseKey; + singingVoiceKey: SingingVoiceKey | undefined; }; }; SET_SEQUENCE_ID_TO_PHRASE: { mutation: { - phraseKey: PhraseSourceHash; + phraseKey: PhraseKey; sequenceId: SequenceId | undefined; }; }; - SET_SINGING_GUIDE: { + SET_PHRASE_QUERY: { + mutation: { + queryKey: EditorFrameAudioQueryKey; + query: EditorFrameAudioQuery; + }; + }; + + DELETE_PHRASE_QUERY: { + mutation: { queryKey: EditorFrameAudioQueryKey }; + }; + + SET_PHRASE_SINGING_VOLUME: { mutation: { - singingGuideKey: SingingGuideSourceHash; - singingGuide: SingingGuide; + singingVolumeKey: SingingVolumeKey; + singingVolume: SingingVolume; }; }; - DELETE_SINGING_GUIDE: { - mutation: { singingGuideKey: SingingGuideSourceHash }; + DELETE_PHRASE_SINGING_VOLUME: { + mutation: { singingVolumeKey: SingingVolumeKey }; }; SELECTED_TRACK: { @@ -1066,7 +1072,7 @@ export type SingingStoreTypes = { FETCH_SING_FRAME_VOLUME: { action(palyoad: { notes: NoteForRequestToEngine[]; - frameAudioQuery: FrameAudioQuery; + query: EditorFrameAudioQuery; engineId: EngineId; styleId: StyleId; }): Promise; diff --git a/tests/unit/lib/selectPriorPhrase.spec.ts b/tests/unit/lib/selectPriorPhrase.spec.ts index 8dc7d0f4f2..7509155583 100644 --- a/tests/unit/lib/selectPriorPhrase.spec.ts +++ b/tests/unit/lib/selectPriorPhrase.spec.ts @@ -1,11 +1,11 @@ import { it, expect } from "vitest"; +import { Phrase, PhraseKey, PhraseState } from "@/store/type"; import { - Phrase, - PhraseSourceHash, - PhraseState, - phraseSourceHashSchema, -} from "@/store/type"; -import { DEFAULT_TPQN, selectPriorPhrase } from "@/sing/domain"; + DEFAULT_BPM, + DEFAULT_TPQN, + selectPriorPhrase, + tickToSecond, +} from "@/sing/domain"; import { NoteId, TrackId } from "@/type/preload"; import { uuid4 } from "@/helpers/random"; @@ -29,30 +29,20 @@ const createPhrase = ( lyric: "ド", }, ], + startTime: tickToSecond( + start * DEFAULT_TPQN - firstRestDuration * DEFAULT_TPQN, + [{ position: 0, bpm: DEFAULT_BPM }], + DEFAULT_TPQN, + ), state, }; }; -const basePhrases = new Map([ - [ - phraseSourceHashSchema.parse("1"), - createPhrase(0, 0, 1, "WAITING_TO_BE_RENDERED"), - ], - [ - phraseSourceHashSchema.parse("2"), - createPhrase(0, 1, 2, "WAITING_TO_BE_RENDERED"), - ], - [ - phraseSourceHashSchema.parse("3"), - createPhrase(0, 2, 3, "WAITING_TO_BE_RENDERED"), - ], - [ - phraseSourceHashSchema.parse("4"), - createPhrase(0, 3, 4, "WAITING_TO_BE_RENDERED"), - ], - [ - phraseSourceHashSchema.parse("5"), - createPhrase(0, 4, 5, "WAITING_TO_BE_RENDERED"), - ], +const basePhrases = new Map([ + [PhraseKey("1"), createPhrase(0, 0, 1, "WAITING_TO_BE_RENDERED")], + [PhraseKey("2"), createPhrase(0, 1, 2, "WAITING_TO_BE_RENDERED")], + [PhraseKey("3"), createPhrase(0, 2, 3, "WAITING_TO_BE_RENDERED")], + [PhraseKey("4"), createPhrase(0, 3, 4, "WAITING_TO_BE_RENDERED")], + [PhraseKey("5"), createPhrase(0, 4, 5, "WAITING_TO_BE_RENDERED")], ]); it("しっかり優先順位に従って探している", () => { From 11c5aa8d3c75ba750fc759ee2cfaa12cbf823412 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Wed, 4 Sep 2024 23:19:45 +0900 Subject: [PATCH 3/4] =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?= =?UTF-8?q?=E3=83=88=E3=82=A8=E3=83=B3=E3=82=B8=E3=83=B3=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E6=83=85=E5=A0=B1json=E3=81=AE=E4=BB=95=E6=A7=98=E3=82=92?= =?UTF-8?q?=E6=9B=B8=E3=81=8D=E3=80=81=E3=82=B9=E3=82=AD=E3=83=BC=E3=83=9E?= =?UTF-8?q?=E3=82=92=E5=AE=9A=E7=BE=A9=20(#2257)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * デフォルトエンジン更新情報jsonの仕様を書き、スキーマを定義 * mockRestoreする * 不要なTextが入ってた * 説明追加 --- ...55\350\250\210\346\226\271\351\207\235.md" | 60 ++++++++++++++ src/domain/defaultEngine.ts | 50 +++++++++++ .../unit/backend/common/configManager.spec.ts | 4 + .../defaultEngine/defaultEngine.spec.ts | 22 +++++ .../latestDefaultEngineInfos.json | 82 +++++++++++++++++++ 5 files changed, 218 insertions(+) create mode 100644 src/domain/defaultEngine.ts create mode 100644 tests/unit/domain/defaultEngine/defaultEngine.spec.ts create mode 100644 tests/unit/domain/defaultEngine/latestDefaultEngineInfos.json diff --git "a/docs/\347\264\260\343\201\213\343\201\204\350\250\255\350\250\210\346\226\271\351\207\235.md" "b/docs/\347\264\260\343\201\213\343\201\204\350\250\255\350\250\210\346\226\271\351\207\235.md" index adccd71e66..5647b95a5d 100644 --- "a/docs/\347\264\260\343\201\213\343\201\204\350\250\255\350\250\210\346\226\271\351\207\235.md" +++ "b/docs/\347\264\260\343\201\213\343\201\204\350\250\255\350\250\210\346\226\271\351\207\235.md" @@ -49,3 +49,63 @@ export type HogeFugaType = z.infer; | 追加時の処理 | zipファイルを所定のフォルダに展開 | エンジンのパスを`config.json`に保存 | | 読み込み時の処理 | 所定のフォルダ内にあるものを読む | `config.json`に保存されたパスを読む | | 削除時の処理 | 所定のフォルダ内のディレクトリを削除 | `config.json`からパスを削除 | + +## デフォルトエンジンの更新情報 + +デフォルトエンジンの更新情報をjson形式で管理しています。 +更新情報には最新のパッケージ(VVPP・VVPPPファイル)のURLやバージョンなどを記載しています。 +パッケージの情報はOS・アーキテクチャ・デバイスごとに分けています。 + +ファイルフォーマットは以下の通りです。 + +```JSONC +{ + //[number] ファイル構造バージョン(仕様変更毎にインクリメントされる) + "formatVersion": 1, + + // Windowsの情報 + "windows": { + "x64": { + "CPU": { + //[string] バージョン + "version": "x.x.x", + + // vvppやvvpppの情報 + "packages": [ + { + //[string] ダウンロードURL + "url": "https://example.com/example.vvpp", + + //[string] ファイル名 + "name": "example.vvpp", + + //[number] バイト数 + "size": 123456, + + //[string(Optional)] ハッシュ値 + "hash": "xxxxxxx", + }, + //... + ] + }, + "GPU/CPU": { /* 同上 */ } + } + }, + + "macos": { + "x64": { + "CPU": { /* 同上 */ } + }, + "arm64": { + "CPU": { /* 同上 */ } + } + }, + + "linux": { + "x64": { + "CPU": { /* 同上 */ }, + "GPU/CPU": { /* 同上 */ } + } + } +} +``` diff --git a/src/domain/defaultEngine.ts b/src/domain/defaultEngine.ts new file mode 100644 index 0000000000..2be6cbf51e --- /dev/null +++ b/src/domain/defaultEngine.ts @@ -0,0 +1,50 @@ +/** + * デフォルトエンジン関連のモジュール + */ + +import { z } from "zod"; + +/** パッケージ(vvppやvvppp1ファイル)ごとのスキーマ */ +const defaultEnginePackageSchema = z.object({ + url: z.string(), + name: z.string(), + size: z.number(), + hash: z.string().optional(), +}); + +/** デバイスごとのスキーマ */ +const defaultEngineDeviceSchema = z.object({ + version: z.string(), + packages: z.array(defaultEnginePackageSchema), +}); + +/** デフォルトエンジンの更新情報のスキーマ */ +const defaultEngineInfosSchema = z.object({ + formatVersion: z.number(), + windows: z.object({ + x64: z.object({ + CPU: defaultEngineDeviceSchema, + "GPU/CPU": defaultEngineDeviceSchema, + }), + }), + macos: z.object({ + x64: z.object({ + CPU: defaultEngineDeviceSchema, + }), + arm64: z.object({ + CPU: defaultEngineDeviceSchema, + }), + }), + linux: z.object({ + x64: z.object({ + CPU: defaultEngineDeviceSchema, + "GPU/CPU": defaultEngineDeviceSchema, + }), + }), +}); + +/** デフォルトエンジンの更新情報を取得する */ +export const fetchDefaultEngineInfos = async (url: string) => { + const response = await fetch(url); + return defaultEngineInfosSchema.parse(await response.json()); +}; diff --git a/tests/unit/backend/common/configManager.spec.ts b/tests/unit/backend/common/configManager.spec.ts index e4acd0e462..a24cfc0b5f 100644 --- a/tests/unit/backend/common/configManager.spec.ts +++ b/tests/unit/backend/common/configManager.spec.ts @@ -37,6 +37,10 @@ class TestConfigManager extends BaseConfigManager { } } +afterEach(() => { + vi.resetAllMocks(); +}); + it("新規作成できる", async () => { vi.spyOn(TestConfigManager.prototype, "exists").mockImplementation( async () => false, diff --git a/tests/unit/domain/defaultEngine/defaultEngine.spec.ts b/tests/unit/domain/defaultEngine/defaultEngine.spec.ts new file mode 100644 index 0000000000..ed883d45e4 --- /dev/null +++ b/tests/unit/domain/defaultEngine/defaultEngine.spec.ts @@ -0,0 +1,22 @@ +// テスト用のファイルを読み込むのでNode環境で実行する +// @vitest-environment node + +import path from "path"; +import fs from "fs"; +import { fetchDefaultEngineInfos } from "@/domain/defaultEngine"; + +const currentDir = "tests/unit/domain/defaultEngine"; + +test("fetchDefaultEngineInfos", async () => { + // テスト用のjsonファイルでfetchをモックする + // 元ファイルは https://raw.githubusercontent.com/VOICEVOX/voicevox_blog/master/src/generateLatestDefaultEngineInfos.ts + const p = path.resolve(currentDir, "latestDefaultEngineInfos.json"); + const json = fs.readFileSync(p, "utf-8"); + const spy = vi.spyOn(global, "fetch").mockResolvedValue(new Response(json)); + + // 読み込めることを確認 + const infos = await fetchDefaultEngineInfos("https://example.com/"); + expect(infos.formatVersion).toBe(1); + + spy.mockRestore(); +}); diff --git a/tests/unit/domain/defaultEngine/latestDefaultEngineInfos.json b/tests/unit/domain/defaultEngine/latestDefaultEngineInfos.json new file mode 100644 index 0000000000..d5cb343f27 --- /dev/null +++ b/tests/unit/domain/defaultEngine/latestDefaultEngineInfos.json @@ -0,0 +1,82 @@ +{ + "formatVersion": 1, + "windows": { + "x64": { + "CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-windows-cpu-0.20.0.vvpp", + "name": "voicevox_engine-windows-cpu-0.20.0.vvpp", + "size": 1374659234 + } + ] + }, + "GPU/CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-windows-directml-0.20.0.vvpp", + "name": "voicevox_engine-windows-directml-0.20.0.vvpp", + "size": 1382829369 + } + ] + } + } + }, + "macos": { + "x64": { + "CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-macos-x64-0.20.0.001.vvppp", + "name": "voicevox_engine-macos-x64-0.20.0.001.vvppp", + "size": 1382766014 + } + ] + } + }, + "arm64": { + "CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-macos-arm64-0.20.0.001.vvppp", + "name": "voicevox_engine-macos-arm64-0.20.0.001.vvppp", + "size": 1375008115 + } + ] + } + } + }, + "linux": { + "x64": { + "CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-linux-cpu-0.20.0.vvpp", + "name": "voicevox_engine-linux-cpu-0.20.0.vvpp", + "size": 1399437028 + } + ] + }, + "GPU/CPU": { + "version": "0.20.0", + "packages": [ + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-linux-nvidia-0.20.0.001.vvppp", + "name": "voicevox_engine-linux-nvidia-0.20.0.001.vvppp", + "size": 1992294400 + }, + { + "url": "https://github.com/VOICEVOX/voicevox_engine/releases/download/0.20.0/voicevox_engine-linux-nvidia-0.20.0.002.vvppp", + "name": "voicevox_engine-linux-nvidia-0.20.0.002.vvppp", + "size": 645130316 + } + ] + } + } + } +} From 747f525a7bc4be8f53dc6af085b6c0d92618d091 Mon Sep 17 00:00:00 2001 From: Takusea <53995265+takusea@users.noreply.github.com> Date: Fri, 6 Sep 2024 01:04:19 +0900 Subject: [PATCH 4/4] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=82=B8=E3=83=B3?= =?UTF-8?q?=E3=81=AE=E7=AE=A1=E7=90=86=E3=83=80=E3=82=A4=E3=82=A2=E3=83=AD?= =?UTF-8?q?=E3=82=B0=E3=81=AE=E3=83=AA=E3=83=87=E3=82=B6=E3=82=A4=E3=83=B3?= =?UTF-8?q?=20(#2255)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * BaseListItemの高さを可変に変更 * BaseToggleGroup関連コンポーネントを追加 * 親のheightの100%が伝搬できるように変更 * BaseNavigationViewコンポーネントを追加 * BaseTextFieldコンポーネントを追加 * エンジンの管理ダイアログをリデザイン * propsにdisabledを追加 * デザイン調整 * Baseコンポーネントのstoriesファイルを追加 * Storyを拡充 * チェック・非チェック時で幅が変わらないように変更 * clickイベントを追加 * 余白を調整 * 関数の指定ミスを修正 * クラスを整理 * SCSSの不要なuseを削除 * コメントを追加 --- src/components/Base/BaseButton.vue | 3 + src/components/Base/BaseListItem.vue | 4 +- .../Base/BaseNavigationView.stories.ts | 27 + src/components/Base/BaseNavigationView.vue | 46 ++ src/components/Base/BaseScrollArea.vue | 13 +- src/components/Base/BaseTextField.stories.ts | 48 ++ src/components/Base/BaseTextField.vue | 75 ++ .../Base/BaseToggleGroup.stories.ts | 47 ++ src/components/Base/BaseToggleGroup.vue | 44 ++ src/components/Base/BaseToggleGroupItem.vue | 99 +++ src/components/Dialog/EngineManageDialog.vue | 679 +++++++++--------- 11 files changed, 733 insertions(+), 352 deletions(-) create mode 100644 src/components/Base/BaseNavigationView.stories.ts create mode 100644 src/components/Base/BaseNavigationView.vue create mode 100644 src/components/Base/BaseTextField.stories.ts create mode 100644 src/components/Base/BaseTextField.vue create mode 100644 src/components/Base/BaseToggleGroup.stories.ts create mode 100644 src/components/Base/BaseToggleGroup.vue create mode 100644 src/components/Base/BaseToggleGroupItem.vue diff --git a/src/components/Base/BaseButton.vue b/src/components/Base/BaseButton.vue index fd7a09e501..2506cb023d 100644 --- a/src/components/Base/BaseButton.vue +++ b/src/components/Base/BaseButton.vue @@ -2,6 +2,7 @@