Skip to content

Commit

Permalink
追加: 文内無音倍率 (VOICEVOX#2352)
Browse files Browse the repository at this point in the history
Co-authored-by: Hiroshiba <[email protected]>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Nov 15, 2024
1 parent 7043359 commit 258c791
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 19 deletions.
11 changes: 11 additions & 0 deletions src/backend/common/ConfigManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,17 @@ const migrations: [string, (store: Record<string, unknown>) => unknown][] = [
return config;
},
],
[
">=0.22",
(config) => {
// プリセットに文内無音倍率を追加
const presets = config.presets as ConfigType["presets"];
for (const preset of Object.values(presets.items)) {
if (preset == undefined) throw new Error("preset == undefined");
preset.pauseLengthScale = 1;
}
},
],
];

export type Metadata = {
Expand Down
18 changes: 18 additions & 0 deletions src/components/Talk/AudioInfo.vue
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,24 @@ const parameterConfigs = computed<ParameterConfig[]>(() => [
}),
key: "volumeScale",
},
{
label: "文内無音倍率",
sliderProps: {
modelValue: () => query.value?.pauseLengthScale ?? null,
disable: () => uiLocked.value,
max: SLIDER_PARAMETERS.PAUSE_LENGTH_SCALE.max,
min: SLIDER_PARAMETERS.PAUSE_LENGTH_SCALE.min,
step: SLIDER_PARAMETERS.PAUSE_LENGTH_SCALE.step,
scrollStep: SLIDER_PARAMETERS.PAUSE_LENGTH_SCALE.scrollStep,
scrollMinStep: SLIDER_PARAMETERS.PAUSE_LENGTH_SCALE.scrollMinStep,
},
onChange: (pauseLengthScale: number) =>
store.actions.COMMAND_MULTI_SET_AUDIO_PAUSE_LENGTH_SCALE({
audioKeys: selectedAudioKeys.value,
pauseLengthScale,
}),
key: "pauseLengthScale",
},
{
label: "開始無音",
sliderProps: {
Expand Down
8 changes: 8 additions & 0 deletions src/domain/project/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ export const migrateProjectFileObject = async (
for (const audioItemsKey in projectData.audioItems) {
if (projectData.audioItems[audioItemsKey].query != null) {
projectData.audioItems[audioItemsKey].query.volumeScale = 1;
projectData.audioItems[audioItemsKey].query.pauseLengthScale = 1;
projectData.audioItems[audioItemsKey].query.prePhonemeLength = 0.1;
projectData.audioItems[audioItemsKey].query.postPhonemeLength = 0.1;
projectData.audioItems[audioItemsKey].query.outputSamplingRate =
Expand Down Expand Up @@ -302,6 +303,13 @@ export const migrateProjectFileObject = async (
projectData.song.trackOrder = Object.keys(newTracks);
}

if (semver.satisfies(projectAppVersion, "<0.22.0", semverSatisfiesOptions)) {
// 文内無音倍率の追加
for (const audioItemsKey in projectData.talk.audioItems) {
projectData.talk.audioItems[audioItemsKey].query.pauseLengthScale = 1;
}
}

// Validation check
// トークはvalidateTalkProjectで検証する
// ソングはSET_SCOREの中の`isValidScore`関数で検証される
Expand Down
1 change: 1 addition & 0 deletions src/domain/project/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const audioQuerySchema = z.object({
pitchScale: z.number(),
intonationScale: z.number(),
volumeScale: z.number(),
pauseLengthScale: z.number(),
prePhonemeLength: z.number(),
postPhonemeLength: z.number(),
outputSamplingRate: z.union([z.number(), z.literal("engineDefault")]),
Expand Down
75 changes: 63 additions & 12 deletions src/store/audio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
AudioCommandStoreTypes,
transformCommandStore,
FetchAudioResult,
EditorAudioQuery,
} from "./type";
import {
buildAudioFileNameFromRawData,
Expand All @@ -34,6 +35,7 @@ import {
isMorphable,
} from "./audioGenerate";
import { ContinuousPlayer } from "./audioContinuousPlayer";
import { convertAudioQueryFromEngineToEditor } from "./proxy";
import {
convertHiraToKana,
convertLongVowel,
Expand Down Expand Up @@ -739,6 +741,8 @@ export const audioStore = createPartialStore<AudioStoreTypes>({
baseAudioItem.query.prePhonemeLength;
newAudioItem.query.postPhonemeLength =
baseAudioItem.query.postPhonemeLength;
newAudioItem.query.pauseLengthScale =
baseAudioItem.query.pauseLengthScale;
newAudioItem.query.outputSamplingRate =
baseAudioItem.query.outputSamplingRate;
newAudioItem.query.outputStereo = baseAudioItem.query.outputStereo;
Expand Down Expand Up @@ -891,6 +895,23 @@ export const audioStore = createPartialStore<AudioStoreTypes>({
},
},

SET_AUDIO_PAUSE_LENGTH_SCALE: {
mutation(
state,
{
audioKey,
pauseLengthScale,
}: {
audioKey: AudioKey;
pauseLengthScale: number;
},
) {
const query = state.audioItems[audioKey].query;
if (query == undefined) throw new Error("query == undefined");
query.pauseLengthScale = pauseLengthScale;
},
},

SET_AUDIO_PRE_PHONEME_LENGTH: {
mutation(
state,
Expand Down Expand Up @@ -949,13 +970,16 @@ export const audioStore = createPartialStore<AudioStoreTypes>({
SET_AUDIO_QUERY: {
mutation(
state,
{ audioKey, audioQuery }: { audioKey: AudioKey; audioQuery: AudioQuery },
{
audioKey,
audioQuery,
}: { audioKey: AudioKey; audioQuery: EditorAudioQuery },
) {
state.audioItems[audioKey].query = audioQuery;
},
action(
{ mutations },
payload: { audioKey: AudioKey; audioQuery: AudioQuery },
payload: { audioKey: AudioKey; audioQuery: EditorAudioQuery },
) {
mutations.SET_AUDIO_QUERY(payload);
},
Expand All @@ -974,11 +998,13 @@ export const audioStore = createPartialStore<AudioStoreTypes>({
.INSTANTIATE_ENGINE_CONNECTOR({
engineId,
})
.then((instance) =>
instance.invoke("audioQueryAudioQueryPost")({
text,
speaker: styleId,
}),
.then(async (instance) =>
convertAudioQueryFromEngineToEditor(
await instance.invoke("audioQueryAudioQueryPost")({
text,
speaker: styleId,
}),
),
)
.catch((error) => {
window.backend.logError(
Expand Down Expand Up @@ -1271,7 +1297,9 @@ export const audioStore = createPartialStore<AudioStoreTypes>({
length += m.consonantLength != undefined ? m.consonantLength : 0;
length += m.vowelLength;
});
length += phrase.pauseMora ? phrase.pauseMora.vowelLength : 0;
length += phrase.pauseMora
? phrase.pauseMora.vowelLength * query.pauseLengthScale
: 0;
// post phoneme lengthは最後のアクセント句の一部として扱う
if (i === accentPhrases.length - 1) {
length += query.postPhonemeLength;
Expand Down Expand Up @@ -1919,7 +1947,7 @@ export const audioCommandStore = transformCommandStore(
payload: { audioKey: AudioKey; text: string } & (
| { update: "Text" }
| { update: "AccentPhrases"; accentPhrases: AccentPhrase[] }
| { update: "AudioQuery"; query: AudioQuery }
| { update: "AudioQuery"; query: EditorAudioQuery }
),
) {
audioStore.mutations.SET_AUDIO_TEXT(draft, {
Expand Down Expand Up @@ -2025,7 +2053,7 @@ export const audioCommandStore = transformCommandStore(
}
| {
update: "AudioQuery";
query: AudioQuery;
query: EditorAudioQuery;
}
| {
update: "OnlyVoice";
Expand Down Expand Up @@ -2089,7 +2117,7 @@ export const audioCommandStore = transformCommandStore(
}
| {
update: "AudioQuery";
query: AudioQuery;
query: EditorAudioQuery;
}
| {
update: "OnlyVoice";
Expand All @@ -2100,7 +2128,7 @@ export const audioCommandStore = transformCommandStore(
try {
const audioItem = state.audioItems[audioKey];
if (audioItem.query == undefined) {
const query: AudioQuery = await actions.FETCH_AUDIO_QUERY({
const query = await actions.FETCH_AUDIO_QUERY({
text: audioItem.text,
engineId: voice.engineId,
styleId: voice.styleId,
Expand Down Expand Up @@ -2711,6 +2739,29 @@ export const audioCommandStore = transformCommandStore(
},
},

COMMAND_MULTI_SET_AUDIO_PAUSE_LENGTH_SCALE: {
mutation(
draft,
payload: {
audioKeys: AudioKey[];
pauseLengthScale: number;
},
) {
for (const audioKey of payload.audioKeys) {
audioStore.mutations.SET_AUDIO_PAUSE_LENGTH_SCALE(draft, {
audioKey,
pauseLengthScale: payload.pauseLengthScale,
});
}
},
action(
{ mutations },
payload: { audioKeys: AudioKey[]; pauseLengthScale: number },
) {
mutations.COMMAND_MULTI_SET_AUDIO_PAUSE_LENGTH_SCALE(payload);
},
},

COMMAND_MULTI_SET_AUDIO_PRE_PHONEME_LENGTH: {
mutation(
draft,
Expand Down
6 changes: 5 additions & 1 deletion src/store/audioGenerate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,11 @@ export async function generateLabFromAudioQuery(
});
if (accentPhrase.pauseMora != undefined) {
labString += timestamp.toFixed() + " ";
timestamp += (accentPhrase.pauseMora.vowelLength * 10000000) / speedScale;
timestamp +=
(accentPhrase.pauseMora.vowelLength *
audioQuery.pauseLengthScale *
10000000) /
speedScale;
labString += timestamp.toFixed() + " ";
labString += accentPhrase.pauseMora.vowel + "\n";
}
Expand Down
1 change: 1 addition & 0 deletions src/store/preset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ export const presetStore = createPartialStore<PresetStoreTypes>({
pitchScale: 0.0,
intonationScale: 1.0,
volumeScale: 1.0,
pauseLengthScale: 1,
prePhonemeLength: 0.1,
postPhonemeLength: 0.1,
};
Expand Down
11 changes: 11 additions & 0 deletions src/store/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const proxyStoreCreator = (_engineFactory: IEngineConnectorFactory) => {
return proxyStore;
};

/** AudioQueryをエンジン用に変換する */
export const convertAudioQueryFromEditorToEngine = (
editorAudioQuery: EditorAudioQuery,
defaultOutputSamplingRate: number,
Expand All @@ -56,4 +57,14 @@ export const convertAudioQueryFromEditorToEngine = (
};
};

/** AudioQueryをエディタ用に変換する */
export const convertAudioQueryFromEngineToEditor = (
engineAudioQuery: AudioQuery,
): EditorAudioQuery => {
return {
...engineAudioQuery,
pauseLengthScale: engineAudioQuery.pauseLengthScale ?? 1,
};
};

export const proxyStore = proxyStoreCreator(OpenAPIEngineConnectorFactory);
25 changes: 19 additions & 6 deletions src/store/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ import {
/**
* エディタ用のAudioQuery
*/
export type EditorAudioQuery = Omit<AudioQuery, "outputSamplingRate"> & {
export type EditorAudioQuery = Omit<
AudioQuery,
"outputSamplingRate" | "pauseLengthScale"
> & {
outputSamplingRate: number | "engineDefault";
pauseLengthScale: number; // エンジンと違って必須
};

export type AudioItem = {
Expand Down Expand Up @@ -290,6 +294,10 @@ export type AudioStoreTypes = {
mutation: { audioKey: AudioKey; volumeScale: number };
};

SET_AUDIO_PAUSE_LENGTH_SCALE: {
mutation: { audioKey: AudioKey; pauseLengthScale: number };
};

SET_AUDIO_PRE_PHONEME_LENGTH: {
mutation: { audioKey: AudioKey; prePhonemeLength: number };
};
Expand Down Expand Up @@ -329,16 +337,16 @@ export type AudioStoreTypes = {
};

SET_AUDIO_QUERY: {
mutation: { audioKey: AudioKey; audioQuery: AudioQuery };
action(payload: { audioKey: AudioKey; audioQuery: AudioQuery }): void;
mutation: { audioKey: AudioKey; audioQuery: EditorAudioQuery };
action(payload: { audioKey: AudioKey; audioQuery: EditorAudioQuery }): void;
};

FETCH_AUDIO_QUERY: {
action(payload: {
text: string;
engineId: EngineId;
styleId: StyleId;
}): Promise<AudioQuery>;
}): Promise<EditorAudioQuery>;
};

SET_AUDIO_VOICE: {
Expand Down Expand Up @@ -506,7 +514,7 @@ export type AudioCommandStoreTypes = {
mutation: { audioKey: AudioKey; text: string } & (
| { update: "Text" }
| { update: "AccentPhrases"; accentPhrases: AccentPhrase[] }
| { update: "AudioQuery"; query: AudioQuery }
| { update: "AudioQuery"; query: EditorAudioQuery }
);
action(payload: { audioKey: AudioKey; text: string }): void;
};
Expand All @@ -522,7 +530,7 @@ export type AudioCommandStoreTypes = {
}
| {
update: "AudioQuery";
query: AudioQuery;
query: EditorAudioQuery;
}
| {
update: "OnlyVoice";
Expand Down Expand Up @@ -627,6 +635,11 @@ export type AudioCommandStoreTypes = {
action(payload: { audioKeys: AudioKey[]; volumeScale: number }): void;
};

COMMAND_MULTI_SET_AUDIO_PAUSE_LENGTH_SCALE: {
mutation: { audioKeys: AudioKey[]; pauseLengthScale: number };
action(payload: { audioKeys: AudioKey[]; pauseLengthScale: number }): void;
};

COMMAND_MULTI_SET_AUDIO_PRE_PHONEME_LENGTH: {
mutation: { audioKeys: AudioKey[]; prePhonemeLength: number };
action(payload: { audioKeys: AudioKey[]; prePhonemeLength: number }): void;
Expand Down
10 changes: 10 additions & 0 deletions src/store/utility.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ export const SLIDER_PARAMETERS = {
scrollStep: () => 0.1,
scrollMinStep: () => 0.01,
},
/**
* 文内無音(倍率)パラメータの定義
*/
PAUSE_LENGTH_SCALE: {
max: () => 2,
min: () => 0,
step: () => 0.01,
scrollStep: () => 0.1,
scrollMinStep: () => 0.01,
},
/**
* モーフィングレートパラメータの定義
*/
Expand Down
2 changes: 2 additions & 0 deletions src/type/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ export type Preset = {
pitchScale: number;
intonationScale: number;
volumeScale: number;
pauseLengthScale: number;
prePhonemeLength: number;
postPhonemeLength: number;
morphingInfo?: MorphingInfo;
Expand Down Expand Up @@ -643,6 +644,7 @@ export const configSchema = z
pitchScale: z.number(),
intonationScale: z.number(),
volumeScale: z.number(),
pauseLengthScale: z.number(),
prePhonemeLength: z.number(),
postPhonemeLength: z.number(),
morphingInfo: z
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 258c791

Please sign in to comment.