Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support manimgl and edge-tts #67

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions manim_voiceover/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
import pkg_resources

__version__: str = pkg_resources.get_distribution(__name__).version

16 changes: 15 additions & 1 deletion manim_voiceover/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,21 @@
import textwrap
from pydub import AudioSegment
from pathlib import Path
from manim import logger

import pkg_resources

__manimtype__: str = "manimce"

try:
pkg_resources.get_distribution("manim")
__manimtype__ = "manimce"
except:
__manimtype__ = "manimgl"

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger


def chunks(lst: list, n: int):
Expand Down
7 changes: 6 additions & 1 deletion manim_voiceover/services/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
prompt_ask_missing_extras,
remove_bookmarks,
)
from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

try:
import azure.cognitiveservices.speech as speechsdk
Expand Down
13 changes: 11 additions & 2 deletions manim_voiceover/services/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import sys
import hashlib
from pathlib import Path
from manim import config, logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import config, logger
else:
from manimlib import config, logger

from slugify import slugify
from manim_voiceover.defaults import (
DEFAULT_VOICEOVER_CACHE_DIR,
Expand Down Expand Up @@ -72,7 +78,10 @@ def __init__(
if cache_dir is not None:
self.cache_dir = cache_dir
else:
self.cache_dir = Path(config.media_dir) / DEFAULT_VOICEOVER_CACHE_DIR
if __manimtype__ == "manimce":
self.cache_dir = Path(config.media_dir) / DEFAULT_VOICEOVER_CACHE_DIR
else:
self.cache_dir = Path(config.get_custom_config()["directories"]["output"]) / DEFAULT_VOICEOVER_CACHE_DIR

if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
Expand Down
8 changes: 7 additions & 1 deletion manim_voiceover/services/coqui.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from pathlib import Path

from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import prompt_ask_missing_package, remove_bookmarks, wav2mp3
from manim_voiceover.services.base import SpeechService

Expand Down
106 changes: 106 additions & 0 deletions manim_voiceover/services/edge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from manim_voiceover.helper import __manimtype__
from manim_voiceover.services.base import SpeechService
import edge_tts

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import (
remove_bookmarks,
)
from pathlib import Path
import asyncio

def serialize_word_boundary(wb):
return {
"audio_offset": wb["offset"],
"duration_milliseconds": int(wb["duration"].microseconds / 1000),
"text_offset": wb["offset"],
"word_length": len(wb["text"]),
"text": wb["text"],
"boundary_type": wb["type"],
}

async def get_voice_file(text, voice, out_file, wb=[]) -> None:
communicate = edge_tts.Communicate(text, voice)
with open(out_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
wb.append(chunk)
print(f"WordBoundary: {chunk}")

class EdgeService(SpeechService):

def __init__(
self,
voice: str = "zh-CN-XiaoxiaoNeural",
style: str = None,
output_format: str = "Audio48Khz192KBitRateMonoMp3",
prosody: dict = None,
**kwargs,
):

self.voice = voice
self.style = style
self.output_format = output_format
self.prosody = prosody
SpeechService.__init__(self, **kwargs)

def generate_from_text(
self, text: str, cache_dir: str = None, path: str = None, **kwargs
) -> dict:
""""""
# Remove bookmarks
input_text = remove_bookmarks(text)
if cache_dir is None:
cache_dir = self.cache_dir

ssml = ""

input_data = {
"input_text": text,
"service": "edge",
"config": {
"voice": self.voice,
"style": self.style,
"output_format": self.output_format,
"prosody": self.prosody,
},
}

cached_result = self.get_cached_result(input_data, cache_dir)
if cached_result is not None:
return cached_result

if path is None:
audio_path = self.get_audio_basename(input_data) + ".mp3"
else:
audio_path = path
word_boundaries = []
loop = asyncio.get_event_loop()
loop.run_until_complete(get_voice_file(text,self.voice,str(Path(cache_dir) / audio_path),word_boundaries))
_word_boundaries=[]
offset = 0
for wb in word_boundaries:
_wb = {}
_wb["audio_offset"] = wb["offset"]
_wb["duration_milliseconds"] = wb["duration"]
_wb["text_offset"] = offset
_wb["word_length"] = len(wb["text"])
_wb["text"] = wb["text"]
_wb["boundary_type"] = wb["type"]
offset+=_wb["word_length"]
_word_boundaries.append(_wb)

json_dict = {
"input_text": text,
"input_data": input_data,
"original_audio": audio_path,
"word_boundaries": _word_boundaries
}

return json_dict
8 changes: 7 additions & 1 deletion manim_voiceover/services/gtts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from pathlib import Path
from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import prompt_ask_missing_extras, remove_bookmarks

try:
Expand Down
8 changes: 7 additions & 1 deletion manim_voiceover/services/pyttsx3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from pathlib import Path
from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import prompt_ask_missing_extras

try:
Expand Down
6 changes: 5 additions & 1 deletion manim_voiceover/services/recorder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from pathlib import Path
from manim_voiceover.helper import __manimtype__
from manim_voiceover.helper import msg_box, prompt_ask_missing_extras, remove_bookmarks

from manim_voiceover.services.base import SpeechService
from manim import logger

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger
try:
import pyaudio
from manim_voiceover.services.recorder.utility import Recorder
Expand Down
6 changes: 5 additions & 1 deletion manim_voiceover/services/recorder/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
import sched
from pathlib import Path
from pydub import AudioSegment
from manim import logger
from manim_voiceover.helper import __manimtype__
if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import trim_silence, wav2mp3

Expand Down
25 changes: 20 additions & 5 deletions manim_voiceover/tracker.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from pathlib import Path
import re
import numpy as np
from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
from manim import Scene
else:
from manimlib import logger
from manimlib import Scene

from typing import Optional, List
from scipy.interpolate import interp1d

from manim import Scene
from manim_voiceover.modify_audio import get_duration
from manim_voiceover.helper import remove_bookmarks

Expand Down Expand Up @@ -48,7 +54,10 @@ def __init__(self, scene: Scene, data: dict, cache_dir: str):
self.cache_dir = cache_dir
self.duration = get_duration(Path(cache_dir) / self.data["final_audio"])
# last_t = scene.last_t
last_t = scene.renderer.time
if __manimtype__ == "manimce":
last_t = scene.renderer.time
else:
last_t = scene.time
if last_t is None:
last_t = 0
self.start_t = last_t
Expand Down Expand Up @@ -97,7 +106,10 @@ def get_remaining_duration(self, buff: float = 0.0) -> float:
int: The remaining duration of the voiceover in seconds.
"""
# result= max(self.end_t - self.scene.last_t, 0)
result = max(self.end_t - self.scene.renderer.time + buff, 0)
if __manimtype__ == "manimce":
result = max(self.end_t - self.scene.renderer.time + buff, 0)
else:
result = max(self.end_t - self.scene.time + buff, 0)
# print(result)
return result

Expand Down Expand Up @@ -128,7 +140,10 @@ def time_until_bookmark(
self._check_bookmarks()
if not mark in self.bookmark_times:
raise Exception("There is no <bookmark mark='%s' />" % mark)
result = max(self.bookmark_times[mark] - self.scene.renderer.time + buff, 0)
if __manimtype__ == "manimce":
result = max(self.bookmark_times[mark] - self.scene.renderer.time + buff, 0)
else:
result = max(self.bookmark_times[mark] - self.scene.time + buff, 0)
if limit is not None:
result = min(limit, result)
return result
7 changes: 6 additions & 1 deletion manim_voiceover/translate/gettext_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
import os
import typing as t

from manim import logger
from manim_voiceover.helper import __manimtype__

if __manimtype__ == "manimce":
from manim import logger
else:
from manimlib import logger

from manim_voiceover.helper import prompt_ask_missing_extras

Expand Down
Loading