From 37c17cafc6648ad9203344b6546fb9bb9d85dca2 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Sat, 24 Aug 2024 19:58:24 +0900 Subject: [PATCH] =?UTF-8?q?=E3=81=9F=E3=81=B6=E3=82=93sing=20engine?= =?UTF-8?q?=E3=81=AE=E3=83=A2=E3=83=83=E3=82=AF=E3=81=8C=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=81=9F=E3=80=81SingEditor=E3=81=AEstories=E3=82=92=E4=BD=9C?= =?UTF-8?q?=E3=81=A3=E3=81=A6=E8=A9=A6=E3=81=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mock/engineMock/index.ts | 54 ++++++++++++++++++- src/mock/engineMock/singModelMock.ts | 79 ++++++++++++++++++++++++---- src/mock/engineMock/talkModelMock.ts | 14 ++--- 3 files changed, 127 insertions(+), 20 deletions(-) diff --git a/src/mock/engineMock/index.ts b/src/mock/engineMock/index.ts index 8a3af1a963..03d97d3f1f 100644 --- a/src/mock/engineMock/index.ts +++ b/src/mock/engineMock/index.ts @@ -8,7 +8,13 @@ import { replacePitchMock, tokensToActtentPhrasesMock, } from "./talkModelMock"; +import { + notesAndFramePhonemesAndPitchToVolumeMock, + notesAndFramePhonemesToPitchMock, + notesToFramePhonemesMock, +} from "./singModelMock"; +import { cloneWithUnwrapProxy } from "@/helpers/cloneWithUnwrapProxy"; import { IEngineConnectorFactory } from "@/infrastructures/EngineConnector"; import { AccentPhrase, @@ -20,13 +26,13 @@ import { FrameAudioQuery, MoraDataMoraDataPostRequest, SingFrameAudioQuerySingFrameAudioQueryPostRequest, + SingFrameVolumeSingFrameVolumePostRequest, Speaker, SpeakerInfo, SpeakerInfoSpeakerInfoGetRequest, SupportedDevicesInfo, SynthesisSynthesisPostRequest, } from "@/openapi"; -import { cloneWithUnwrapProxy } from "@/helpers/cloneWithUnwrapProxy"; export const dicPath = "engineMock/dict"; export const assetsPath = "engineMock/assets"; @@ -156,7 +162,51 @@ export function createOpenAPIEngineMock(): IEngineConnectorFactory { async singFrameAudioQuerySingFrameAudioQueryPost( payload: SingFrameAudioQuerySingFrameAudioQueryPostRequest, - ): Promise {}, + ): Promise { + const { score, speaker: styleId } = payload; + + const phonemes = notesToFramePhonemesMock(score.notes, styleId); + const f0 = notesAndFramePhonemesToPitchMock( + score.notes, + phonemes, + styleId, + ); + const volume = notesAndFramePhonemesAndPitchToVolumeMock( + score.notes, + phonemes, + f0, + styleId, + ); + + return { + f0, + volume, + phonemes, + volumeScale: 1.0, + outputSamplingRate: 44100, + outputStereo: false, + }; + }, + + async singFrameVolumeSingFrameVolumePost( + payload: SingFrameVolumeSingFrameVolumePostRequest, + ): Promise> { + const { + speaker: stlyeId, + bodySingFrameVolumeSingFrameVolumePost: { + score, + frameAudioQuery, + }, + } = payload; + + const volume = notesAndFramePhonemesAndPitchToVolumeMock( + score.notes, + frameAudioQuery.phonemes, + frameAudioQuery.f0, + stlyeId, + ); + return volume; + }, }; } diff --git a/src/mock/engineMock/singModelMock.ts b/src/mock/engineMock/singModelMock.ts index 518eee5e5b..1c344783a9 100644 --- a/src/mock/engineMock/singModelMock.ts +++ b/src/mock/engineMock/singModelMock.ts @@ -4,8 +4,9 @@ */ import { moraToPhonemes } from "./phonemeMock"; -import { moraPattern, convertHiraToKana } from "@/domain/japanese"; +import { convertHiraToKana } from "@/domain/japanese"; import { Note, FramePhoneme } from "@/openapi"; +import { noteNumberToFrequency } from "@/sing/domain"; /** アルファベット文字列を適当な0~1の適当な数値に変換する */ function alphabetsToNumber(text: string): number { @@ -21,16 +22,27 @@ function phonemeToLengthMock(phoneme: string): number { /** 揺れ幅が-30cent~30centになるように適当なピッチを決める */ function phonemeAndKeyToPitchMock(phoneme: string, key: number): number { - const base = 440 * Math.pow(2, (key - 69) / 12); + const base = noteNumberToFrequency(key); const shift = (-30 + 60 * alphabetsToNumber(phoneme)) / 1200; return base * Math.pow(2, shift); } +/** 0.8~1.0になるような適当な音量を決める */ +function phonemeAndPitchToVolumeMock(phoneme: string, pitch: number): number { + const minPitch = noteNumberToFrequency(1); + const maxPitch = noteNumberToFrequency(128); + const normalized = (pitch - minPitch) / (maxPitch - minPitch); + return 0.75 + normalized * 0.2 + alphabetsToNumber(phoneme) * 0.05; +} + /** * ノートから音素と適当な音素長を作成する。 * 母音の開始位置をノートの開始位置は一致させ、子音は前のノートに食い込むようにする。 */ -function notesToFramePhonemesMock(notes: Note[]): FramePhoneme[] { +export function notesToFramePhonemesMock( + notes: Note[], + styleId: number, +): FramePhoneme[] { const framePhonemes: FramePhoneme[] = []; for (const note of notes) { const phonemes = moraToPhonemes[convertHiraToKana(note.lyric)]; @@ -43,6 +55,9 @@ function notesToFramePhonemesMock(notes: Note[]): FramePhoneme[] { // 子音は適当な長さ let consonantLength = phonemeToLengthMock(consonant); + // 別の歌手で同じにならないように適当に値をずらす + consonantLength += styleId * 0.03; + // 子音の長さが前のノートの長さ以上になる場合、子音の長さをノートの半分にする const beforeFramePhoneme = framePhonemes[framePhonemes.length - 1]; if (beforeFramePhoneme.frameLength < consonantLength) { @@ -63,15 +78,57 @@ function notesToFramePhonemesMock(notes: Note[]): FramePhoneme[] { } /** ノートと音素長から適当なピッチを作成する */ -function notesAndFramePhonemesToPitchMock( +export function notesAndFramePhonemesToPitchMock( notes: Note[], framePhonemes: FramePhoneme[], + styleId: number, ): number[] { - // こんな感じ - // const f0 = moras.flatMap((mora, i) => - // Array(framePerMora[i]).fill( - // mora.pitch == 0 ? 0 : Math.exp(mora.pitch), - // ), - // ); - // note idがほしい! + return framePhonemes.flatMap((phoneme, i) => { + // IDが同じノートを探す + const note = notes + .filter((note) => note.id != undefined) + .find((note) => note.id == phoneme.noteId); + if (note == undefined) + throw new Error(`ノートが見つかりません: ${i} ${phoneme.phoneme}`); + + let pitch; + if (note.key != undefined) { + pitch = note.key = phonemeAndKeyToPitchMock(phoneme.phoneme, note.key); + + // 別の歌手で同じにならないように適当に値をずらす + pitch *= 1 + styleId * 0.03; + } else { + pitch = 0; + } + + return Array(phoneme.frameLength).fill(pitch); + }); +} + +/** + * ノートと音素長とピッチから適当な音量を作成する。 + * ピッチが高いほど音量が大きくなるようにする。 + * NOTE: ノートは一旦無視している。 + */ +export function notesAndFramePhonemesAndPitchToVolumeMock( + notes: Note[], + framePhonemes: FramePhoneme[], + f0: number[], + styleId: number, +): number[] { + const phonemePerFrame = framePhonemes.flatMap((phoneme) => + Array(phoneme.frameLength).fill(phoneme.phoneme), + ); + + return Array(f0.length).map((_, i) => { + const phoneme = phonemePerFrame[i]; + const pitch = f0[i]; + + let volume = phonemeAndPitchToVolumeMock(phoneme, pitch); + + // 別の歌手で同じにならないように適当に値をずらす + volume *= 1 - styleId * 0.03; + + return volume; + }); } diff --git a/src/mock/engineMock/talkModelMock.ts b/src/mock/engineMock/talkModelMock.ts index f38425b14f..b3e2aec4a1 100644 --- a/src/mock/engineMock/talkModelMock.ts +++ b/src/mock/engineMock/talkModelMock.ts @@ -60,7 +60,7 @@ function textToAccentPhraseMock(text: string): AccentPhrase { */ export function replaceLengthMock( accentPhrases: AccentPhrase[], - speaker: number, + styleId: number, ) { for (const accentPhrase of accentPhrases) { for (let i = 0; i < accentPhrase.moras.length; i++) { @@ -78,7 +78,7 @@ export function replaceLengthMock( // 別のアクセント句や話者で同じにならないように適当に値をずらす for (let i = 0; i < accentPhrases.length; i++) { - const diff = i * 0.01 + speaker * 0.03; + const diff = i * 0.01 + styleId * 0.03; const accentPhrase = accentPhrases[i]; for (const mora of accentPhrase.moras) { if (mora.consonantLength != undefined) mora.consonantLength += diff; @@ -96,7 +96,7 @@ export function replaceLengthMock( */ export function replacePitchMock( accentPhrases: AccentPhrase[], - speaker: number, + styleId: number, ) { for (const accentPhrase of accentPhrases) { for (let i = 0; i < accentPhrase.moras.length; i++) { @@ -118,7 +118,7 @@ export function replacePitchMock( // 別のアクセント句や話者で同じにならないように適当に値をずらす for (let i = 0; i < accentPhrases.length; i++) { - const diff = i * 0.01 + speaker * 0.03; + const diff = i * 0.01 + styleId * 0.03; const accentPhrase = accentPhrases[i]; for (const mora of accentPhrase.moras) { if (mora.pitch > 0) mora.pitch += diff; @@ -133,7 +133,7 @@ export function replacePitchMock( */ export function tokensToActtentPhrasesMock( tokens: IpadicFeatures[], - speaker: number, + styleId: number, ) { const accentPhrases: AccentPhrase[] = []; let textPhrase = ""; @@ -186,8 +186,8 @@ export function tokensToActtentPhrasesMock( } // 長さとピッチを代入 - replaceLengthMock(accentPhrases, speaker); - replacePitchMock(accentPhrases, speaker); + replaceLengthMock(accentPhrases, styleId); + replacePitchMock(accentPhrases, styleId); return accentPhrases; }