From cf30ffb6609ddfe9dd50d3ef07eece355cca93c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Tue, 17 Sep 2024 16:34:01 +0200 Subject: [PATCH 1/9] make pyproj find automaticallyt packages --- moshi/pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml index 4b77729..662cbea 100644 --- a/moshi/pyproject.toml +++ b/moshi/pyproject.toml @@ -23,10 +23,6 @@ dynamic = ["version"] requires = ["setuptools"] build-backend = "setuptools.build_meta" -[tool.setuptools] -packages = ["moshi", "moshi.utils", "moshi.modules", "moshi.models", "moshi.quantization"] - - [project.optional-dependencies] dev = [ "pyright", From 0fb83ff3277c708a7cedb955e41ba6a865dd9e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Tue, 17 Sep 2024 16:59:08 +0200 Subject: [PATCH 2/9] disable --- moshi/moshi/models/compression.py | 6 ++-- moshi/moshi/models/lm.py | 5 +-- moshi/moshi/utils/compile.py | 9 +++-- moshi/pyproject.toml | 2 ++ scripts/mimi_streaming_test.py | 58 +++++++++++++++++-------------- scripts/setup.cfg | 1 + 6 files changed, 48 insertions(+), 33 deletions(-) diff --git a/moshi/moshi/models/compression.py b/moshi/moshi/models/compression.py index 9cf9996..9c271ca 100644 --- a/moshi/moshi/models/compression.py +++ b/moshi/moshi/models/compression.py @@ -220,12 +220,14 @@ def __init__( ) def _init_streaming_state(self, batch_size: int) -> _MimiState: + device = next(self.parameters()).device + disable = device.type != 'cuda' graphed_tr_dec = None graphed_tr_enc = None if self.encoder_transformer is not None: - graphed_tr_enc = CUDAGraphed(self.encoder_transformer) + graphed_tr_enc = CUDAGraphed(self.encoder_transformerd, disable=disable) if self.decoder_transformer is not None: - graphed_tr_dec = CUDAGraphed(self.decoder_transformer) + graphed_tr_dec = CUDAGraphed(self.decoder_transformerd, disable=disable) return _MimiState(graphed_tr_enc, graphed_tr_dec) @property diff --git a/moshi/moshi/models/lm.py b/moshi/moshi/models/lm.py index 58ad922..8cca181 100644 --- a/moshi/moshi/models/lm.py +++ b/moshi/moshi/models/lm.py @@ -372,8 +372,9 @@ def _init_streaming_state(self, batch_size: int) -> _LMGenState: dtype=torch.long, ) - graphed_main = CUDAGraphed(lm_model.forward_text) - graphed_depth = CUDAGraphed(self.depformer_step) + disable = lm_model.device.type != 'cuda' + graphed_main = CUDAGraphed(lm_model.forward_text, disable=disable) + graphed_depth = CUDAGraphed(self.depformer_step, disable=disable) return _LMGenState(cache, initial, graphed_main, graphed_depth) diff --git a/moshi/moshi/utils/compile.py b/moshi/moshi/utils/compile.py index b47e987..d101c86 100644 --- a/moshi/moshi/utils/compile.py +++ b/moshi/moshi/utils/compile.py @@ -194,11 +194,14 @@ class CUDAGraphed: be top level args, not nested in structures (tuples, dicts, etc). Keyword arguments are NOT supported for simplicity. warmup_steps: how many call to make normally before CUDA Graphing. In particular, this - allows torch.compiled functions to get properly compiled.""" + allows torch.compiled functions to get properly compiled. + disabled: if True, just call the func directly, useful to quickly deactivate on CPU. + """ - def __init__(self, func: tp.Callable, warmup_steps: int = 1): + def __init__(self, func: tp.Callable, warmup_steps: int = 1, disable: bool = False): self.func = func self.warmup_steps = warmup_steps + self.disable = disable self._graph: cuda.CUDAGraph | None = None self._output: tuple | None = None self._args: tuple | None = None @@ -214,7 +217,7 @@ def reset(self, warmup_steps: int = 0) -> None: def __call__(self, *args, **kwargs) -> tp.Any: if kwargs: raise RuntimeError("Named arguments not supported for now.") - if not _is_cuda_graph_enabled() or in_cuda_graph(): + if self.disable or not _is_cuda_graph_enabled() or in_cuda_graph(): return self.func(*args, **kwargs) def _clone_tensors(args: tuple) -> tuple: diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml index 662cbea..d7044c3 100644 --- a/moshi/pyproject.toml +++ b/moshi/pyproject.toml @@ -18,6 +18,8 @@ maintainers = [{name="Laurent Mazaré", email="laurent@kyutai.org"}] license = {text = "MIT"} dynamic = ["version"] +[tool.setuptools.dynamic] +version = {attr = "moshi.__version__"} [build-system] requires = ["setuptools"] diff --git a/scripts/mimi_streaming_test.py b/scripts/mimi_streaming_test.py index d71f07c..5f8c4da 100644 --- a/scripts/mimi_streaming_test.py +++ b/scripts/mimi_streaming_test.py @@ -3,20 +3,23 @@ # LICENSE file in the root directory of this source tree. import argparse -import moshi +import random import time -import torch + +import numpy as np import sphn +import torch from torch.profiler import profile, ProfilerActivity -import numpy as np -import random -SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE -DEVICE = "cuda:0" -ENABLE_PROFILING = False +from moshi.models import loaders + parser = argparse.ArgumentParser() -parser.add_argument("--weights", type=str) +parser.add_argument("--weights", type=str, default=loaders.MIMI_V0_1) +parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO) +parser.add_argument("--device", type=str, + default='cuda' if torch.cuda.device_count() else 'cpu') +parser.add_argument("--profile", action='store_true') args = parser.parse_args() @@ -35,23 +38,26 @@ def seed_all(seed): print("loading mimi") -ec = moshi.models.moshi.get_encodec(args.weights, DEVICE) +mimi = loaders.get_mimi( + loaders.resolve_model_checkpoint(args.weights, args.hf_repo), + args.device) print("mimi loaded") -def encodec_streaming_test(ec, pcm_chunk_size=1920, max_duration_sec=10.0): +def mimi_streaming_test(mimi, pcm_chunk_size=1920, max_duration_sec=10.0): # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 sample_pcm, sample_sr = sphn.read("bria.mp3") + sample_rate = mimi.sample_rate print("loaded pcm", sample_pcm.shape, sample_sr) sample_pcm = sphn.resample( - sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=SAMPLE_RATE + sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=sample_rate ) - sample_pcm = torch.tensor(sample_pcm, device=DEVICE) - max_duration_len = int(SAMPLE_RATE * max_duration_sec) + sample_pcm = torch.tensor(sample_pcm, device=args.device) + max_duration_len = int(sample_rate * max_duration_sec) if sample_pcm.shape[-1] > max_duration_len: sample_pcm = sample_pcm[..., :max_duration_len] print("resampled pcm", sample_pcm.shape, sample_sr) - sample_pcm = sample_pcm[None].to(device=DEVICE) + sample_pcm = sample_pcm[None].to(device=args.device) print("streaming encoding...") start_time = time.time() @@ -61,34 +67,34 @@ def run_loop(): for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size): end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size) chunk = sample_pcm[..., start_idx:end_idx] - codes, _scale = ec.encode(chunk) + codes = mimi.encode(chunk) if codes.shape[-1]: print(start_idx, codes.shape, end="\r") all_codes.append(codes) - if ENABLE_PROFILING: + if args.profile: with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof: run_loop() prof.export_chrome_trace("trace.json") else: run_loop() - all_codes = torch.cat(all_codes, dim=-1) - print(f"codes {all_codes.shape} generated in {time.time() - start_time:.2f}s") + all_codes_th = torch.cat(all_codes, dim=-1) + print(f"codes {all_codes_th.shape} generated in {time.time() - start_time:.2f}s") print("streaming decoding...") all_pcms = [] - with ec.streaming(): - for i in range(all_codes.shape[-1]): - codes = all_codes[..., i : i + 1] - pcm = ec.decode(codes, scale=None) + with mimi.streaming(1): + for i in range(all_codes_th.shape[-1]): + codes = all_codes_th[..., i : i + 1] + pcm = mimi.decode(codes) print(i, pcm.shape, end="\r") all_pcms.append(pcm) all_pcms = torch.cat(all_pcms, dim=-1) print("pcm", all_pcms.shape, all_pcms.dtype) - sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), SAMPLE_RATE) - pcm = ec.decode(all_codes, scale=None) + sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), sample_rate) + pcm = mimi.decode(all_codes_th) print("pcm", pcm.shape, pcm.dtype) - sphn.write_wav.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), SAMPLE_RATE) + sphn.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), sample_rate) with torch.no_grad(): - encodec_streaming_test(ec) + mimi_streaming_test(mimi) diff --git a/scripts/setup.cfg b/scripts/setup.cfg index dc7aa4b..5bccac4 100755 --- a/scripts/setup.cfg +++ b/scripts/setup.cfg @@ -3,3 +3,4 @@ max-line-length = 120 [flake8] max-line-length = 120 +ignore = E203,E704 From 73d7249471ec17dd40244f778b0cf90a595cf3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Tue, 17 Sep 2024 17:06:28 +0200 Subject: [PATCH 3/9] fixing mimi test --- moshi/moshi/models/compression.py | 4 ++-- moshi/moshi/modules/transformer.py | 17 +++++++++++------ moshi/moshi/utils/compile.py | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/moshi/moshi/models/compression.py b/moshi/moshi/models/compression.py index 9c271ca..7a790d5 100644 --- a/moshi/moshi/models/compression.py +++ b/moshi/moshi/models/compression.py @@ -225,9 +225,9 @@ def _init_streaming_state(self, batch_size: int) -> _MimiState: graphed_tr_dec = None graphed_tr_enc = None if self.encoder_transformer is not None: - graphed_tr_enc = CUDAGraphed(self.encoder_transformerd, disable=disable) + graphed_tr_enc = CUDAGraphed(self.encoder_transformer, disable=disable) if self.decoder_transformer is not None: - graphed_tr_dec = CUDAGraphed(self.decoder_transformerd, disable=disable) + graphed_tr_dec = CUDAGraphed(self.decoder_transformer, disable=disable) return _MimiState(graphed_tr_enc, graphed_tr_dec) @property diff --git a/moshi/moshi/modules/transformer.py b/moshi/moshi/modules/transformer.py index 212d721..84d1952 100644 --- a/moshi/moshi/modules/transformer.py +++ b/moshi/moshi/modules/transformer.py @@ -9,6 +9,7 @@ See `StreamingTransformer` for more information. """ +from contextlib import ExitStack from dataclasses import dataclass import typing as tp @@ -17,6 +18,7 @@ import torch.nn as nn from torch.nn import functional as F +from ..utils.compile import no_compile from .gating import make_gating from .rope import RotaryEmbedding from .streaming import StreamingModule, StreamingContainer @@ -579,12 +581,15 @@ def _sa_block(self, x: torch.Tensor): return x_orig + self.layer_scale_1(update) def forward(self, x: torch.Tensor): - x = self._sa_block(x) - x = self._ff_block(x) - state = self._streaming_state - if state: - state.offset_cpu += x.shape[1] - return x + with ExitStack() as stack: + if x.device.type != 'cuda': + stack.enter_context(no_compile()) + x = self._sa_block(x) + x = self._ff_block(x) + state = self._streaming_state + if state: + state.offset_cpu += x.shape[1] + return x @dataclass diff --git a/moshi/moshi/utils/compile.py b/moshi/moshi/utils/compile.py index d101c86..780513b 100644 --- a/moshi/moshi/utils/compile.py +++ b/moshi/moshi/utils/compile.py @@ -23,7 +23,7 @@ @contextmanager def no_compile(): - """Disable torch.compile locally.""" + """Disable torch.compile locally. Now Pytorch 2.4 provides a function to do that.""" global _compile_disabled prev_disabled = _compile_disabled From 811c885c644e5f5aea0444ae148c32dd37996ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Tue, 17 Sep 2024 17:22:41 +0200 Subject: [PATCH 4/9] fix --- moshi/moshi/models/loaders.py | 2 +- moshi/moshi/server.py | 12 ++--- moshi/pyproject.toml | 2 +- moshi/requirements.txt | 1 + scripts/mimi_streaming_test.py | 3 +- scripts/moshi_benchmark.py | 87 ++++++++++++++++++---------------- 6 files changed, 57 insertions(+), 50 deletions(-) diff --git a/moshi/moshi/models/loaders.py b/moshi/moshi/models/loaders.py index af0ca7a..1917694 100644 --- a/moshi/moshi/models/loaders.py +++ b/moshi/moshi/models/loaders.py @@ -104,7 +104,7 @@ def _is_safetensors(path: Path | str) -> bool: return Path(path).suffix in (".safetensors", ".sft", ".sfts") -def resolve_model_checkpoint(name: str, hf_repo: str = HF_REPO, allow_local_file: bool = False) -> Path: +def resolve_model_checkpoint(name: str, hf_repo: str = HF_REPO, allow_local_file: bool = True) -> Path: """Load a model checkpoint from HF. If `allow_local_file` is True, then if a file `name` exists, it will be used instead. """ diff --git a/moshi/moshi/server.py b/moshi/moshi/server.py index e610061..a67a332 100644 --- a/moshi/moshi/server.py +++ b/moshi/moshi/server.py @@ -175,11 +175,11 @@ def main(): parser.add_argument("--gradio_tunnel_token", help='Provide a custom (secret) token here to keep getting the same URL.') - parser.add_argument("--tokenizer-name", type=str, default=loaders.TEXT_TOKENIZER_V0_1, + parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1, help="Name of the text tokenizer file in the given HF repo, or path to a local file.") - parser.add_argument("--moshi-name", type=str, default=loaders.MOSHIKO_V0_1, + parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1, help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.") - parser.add_argument("--mimi-name", type=str, default=loaders.MIMI_V0_1, + parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1, help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.") parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO, help="HF repo to look into, defaults to Kyutai official one.") @@ -204,15 +204,15 @@ def main(): tunnel_token = args.gradio_tunnel_token log("info", "loading mimi") - mimi_path = loaders.resolve_model_checkpoint(args.mimi_name, args.hf_repo, allow_local_file=True) + mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo) mimi = loaders.get_mimi(mimi_path, args.device) log("info", "mimi loaded") - tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer_name, args.hf_repo, allow_local_file=True) + tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo) text_tokenizer = loaders.get_text_tokenizer(tokenizer_path) log("info", "loading moshi") - moshi_path = loaders.resolve_model_checkpoint(args.moshi_name, args.hf_repo, allow_local_file=True) + moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo) lm = loaders.get_moshi_lm(moshi_path, args.device) log("info", "moshi loaded") diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml index d7044c3..d5b7578 100644 --- a/moshi/pyproject.toml +++ b/moshi/pyproject.toml @@ -3,7 +3,7 @@ name = "moshi" requires-python = ">= 3.10" description = "Moshi is moshi" dependencies = [ - "numpy >= 2.1.0, < 2.2", + "numpy >= 1.26, < 2.2", "safetensors >= 0.4.0, < 0.5", "huggingface-hub >= 0.24, < 0.25", "einops == 0.7", diff --git a/moshi/requirements.txt b/moshi/requirements.txt index 9a93905..876de9d 100644 --- a/moshi/requirements.txt +++ b/moshi/requirements.txt @@ -5,5 +5,6 @@ sounddevice==0.5.0 soundfile==0.12.1 sphn==0.1.4 torch==2.2.0 +numpy==1.26.4 aiohttp>=3.10.5, <3.11 huggingface-hub==0.24.6 diff --git a/scripts/mimi_streaming_test.py b/scripts/mimi_streaming_test.py index 5f8c4da..54865a3 100644 --- a/scripts/mimi_streaming_test.py +++ b/scripts/mimi_streaming_test.py @@ -44,7 +44,8 @@ def seed_all(seed): print("mimi loaded") -def mimi_streaming_test(mimi, pcm_chunk_size=1920, max_duration_sec=10.0): +def mimi_streaming_test(mimi, max_duration_sec=10.0): + pcm_chunk_size = int(mimi.sample_rate / mimi.frame_rate) # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 sample_pcm, sample_sr = sphn.read("bria.mp3") sample_rate = mimi.sample_rate diff --git a/scripts/moshi_benchmark.py b/scripts/moshi_benchmark.py index 056542f..0bd015b 100644 --- a/scripts/moshi_benchmark.py +++ b/scripts/moshi_benchmark.py @@ -3,26 +3,30 @@ # LICENSE file in the root directory of this source tree. import argparse -import moshi -import sentencepiece -import torch -import sphn -import numpy as np import random import time +import numpy as np +import sentencepiece +import sphn +import torch from torch.profiler import profile, ProfilerActivity -SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE -DEVICE = "cuda:0" -ENABLE_PROFILING = False +from moshi.models import loaders, LMGen + parser = argparse.ArgumentParser() -parser.add_argument("--tokenizer", type=str) -parser.add_argument("--moshi-weights", type=str) -parser.add_argument("--mimi-weights", type=str) +parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1, + help="Name of the text tokenizer file in the given HF repo, or path to a local file.") +parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1, + help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.") +parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1, + help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.") +parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO, + help="HF repo to look into, defaults to Kyutai official one.") parser.add_argument("--steps", default=100, type=int) parser.add_argument("--profile", action="store_true") +parser.add_argument("--device", type=str, default='cuda') args = parser.parse_args() @@ -39,52 +43,53 @@ def seed_all(seed): seed_all(42424242) +tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo) +text_tokenizer = loaders.get_text_tokenizer(tokenizer_path) print("loading mimi") -ec = moshi.models.moshi.get_encodec(args.mimi_weights, DEVICE) +mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo) +mimi = loaders.get_mimi(mimi_path, args.device) print("mimi loaded") -text_tokenizer = sentencepiece.SentencePieceProcessor(args.tokenizer) print("loading moshi") -lm = moshi.models.moshi.get_lm(args.moshi_weights, DEVICE) -lm.to(torch.bfloat16) +moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo) +lm = loaders.get_moshi_lm(moshi_path, args.device) +lm_gen = LMGen(lm) print("lm loaded") -lm_gen = moshi.models.LMGen(lm) - def cb(step, total): print(f"{step:06d} / {total:06d}", end="\r") def streaming_test(bs): - main_audio = [] main_text = [] + frame_size = int(mimi.sample_rate / mimi.frame_rate) + def run_step(): start_time = time.time() # Chunk should contain the pcm data from the user, single channel with a sample rate of 24000. - chunk = torch.zeros((bs, 1, 1920), dtype=torch.float, device=DEVICE) - codes = ec.encode(chunk) + chunk = torch.zeros((bs, 1, frame_size), dtype=torch.float, device=args.device) + codes = mimi.encode(chunk) assert codes.shape[-1] == 1 - for c in range(codes.shape[-1]): - be = time.time() - ev = torch.cuda.Event(enable_timing=True) - ev.record() - tokens = lm_gen.step(codes[:, :, c : c + 1]) - if tokens is None: - print("Skipping") - return - evb = torch.cuda.Event(enable_timing=True) - evb.record() - dt_step = time.time() - be - text_tokens = tokens[:, 0, 0] - audio_tokens = tokens[:, 1:, :] - main_pcm = ec.decode(audio_tokens) - # main_pcm is the audio to be played back to the user, here we just append it and store it in - # a file once the loop is finished. - main_audio.append(main_pcm[0]) + be = time.time() + ev = torch.cuda.Event(enable_timing=True) + ev.record() + tokens = lm_gen.step(codes[:, :, :1]) + if tokens is None: + print("Skipping") + return + evb = torch.cuda.Event(enable_timing=True) + evb.record() + dt_step = time.time() - be + text_tokens = tokens[:, 0, 0] + audio_tokens = tokens[:, 1:, :] + main_pcm = mimi.decode(audio_tokens) + # main_pcm is the audio to be played back to the user, here we just append it and store it in + # a file once the loop is finished. + main_audio.append(main_pcm[0]) evb.synchronize() dg = ev.elapsed_time(evb) torch.cuda.synchronize() @@ -109,17 +114,17 @@ def run_step(): run_step() print() prof.export_chrome_trace("trace.json") - main_audio = torch.cat(main_audio, dim=-1) - print(main_audio.shape) + main_audio_th = torch.cat(main_audio, dim=-1) + print(main_audio_th.shape) print("generated text:") print("".join(main_text)) sphn.write_wav( - "gen_main.wav", main_audio[0].cpu().numpy().astype(np.float32), SAMPLE_RATE + "gen_main.wav", main_audio_th[0].cpu().numpy().astype(np.float32), mimi.sample_rate ) print("streaming test") bs = 1 with torch.no_grad(): - with ec.streaming(bs), lm_gen.streaming(bs): + with mimi.streaming(bs), lm_gen.streaming(bs): streaming_test(bs) From 5b4ebe107a651256033008769b1cb29948fce266 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 17 Sep 2024 17:53:39 +0100 Subject: [PATCH 5/9] chore: update config --- rust/moshi-backend/config-q8.json | 2 +- rust/moshi-backend/config.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/moshi-backend/config-q8.json b/rust/moshi-backend/config-q8.json index 1f0ad96..4e45147 100644 --- a/rust/moshi-backend/config-q8.json +++ b/rust/moshi-backend/config-q8.json @@ -1,6 +1,6 @@ { "instance_name": "foo", - "hf_repo": "kmhf/msh-v0.1", + "hf_repo": "kmhf/moshi-v0.1", "lm_model_file": "$HOME/tmp/moshiko_rs_301e30bf@120.q8.gguf", "text_tokenizer_file": "$HOME/tmp/tokenizer_spm_32k_3.model", "log_dir": "$HOME/tmp/moshi-logs", diff --git a/rust/moshi-backend/config.json b/rust/moshi-backend/config.json index 3a7fa9d..108b2c2 100644 --- a/rust/moshi-backend/config.json +++ b/rust/moshi-backend/config.json @@ -1,6 +1,6 @@ { "instance_name": "foo", - "hf_repo": "kmhf/msh-v0.1", + "hf_repo": "kmhf/moshi-v0.1", "lm_model_file": "$HOME/tmp/moshiko_rs_301e30bf@120.safetensors", "text_tokenizer_file": "$HOME/tmp/tokenizer_spm_32k_3.model", "log_dir": "$HOME/tmp/moshi-logs", From ea5228caf7c36653a665ac14b682d8aa0460b65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= Date: Tue, 17 Sep 2024 19:10:15 +0200 Subject: [PATCH 6/9] fix --- .github/workflows/precommit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 643e3bc..51ca7ba 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -1,4 +1,4 @@ -name: precommmit +name: precommit on: push: branches: [ main ] From 91dc799ef3560cba3525118244581509e1719ca0 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 17 Sep 2024 21:23:32 +0100 Subject: [PATCH 7/9] chore: autoopen browser --- moshi_mlx/moshi_mlx/local_web.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/moshi_mlx/moshi_mlx/local_web.py b/moshi_mlx/moshi_mlx/local_web.py index 2e22aba..b63a9e6 100644 --- a/moshi_mlx/moshi_mlx/local_web.py +++ b/moshi_mlx/moshi_mlx/local_web.py @@ -17,6 +17,7 @@ import sphn import aiohttp from aiohttp import web +import webbrowser import mlx.core as mx import mlx.nn as nn @@ -334,6 +335,10 @@ async def handle_root(_): runner = web.AppRunner(app) await runner.setup() site = web.TCPSite(runner, args.host, args.port) + + log("info", f"opening browser at http://{args.host}:{args.port}") + webbrowser.open(f"http://{args.host}:{args.port}") + await asyncio.gather( recv_loop(), send_loop(), recv_loop2(), send_loop2(), site.start() ) From e64bd8ad339a6c13966e891a0fc40111e80a5375 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 17 Sep 2024 21:34:40 +0100 Subject: [PATCH 8/9] chore: flag --- moshi_mlx/moshi_mlx/local_web.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/moshi_mlx/moshi_mlx/local_web.py b/moshi_mlx/moshi_mlx/local_web.py index b63a9e6..07b785b 100644 --- a/moshi_mlx/moshi_mlx/local_web.py +++ b/moshi_mlx/moshi_mlx/local_web.py @@ -336,8 +336,9 @@ async def handle_root(_): await runner.setup() site = web.TCPSite(runner, args.host, args.port) - log("info", f"opening browser at http://{args.host}:{args.port}") - webbrowser.open(f"http://{args.host}:{args.port}") + if not args.no_browser: + log("info", f"opening browser at http://{args.host}:{args.port}") + webbrowser.open(f"http://{args.host}:{args.port}") await asyncio.gather( recv_loop(), send_loop(), recv_loop2(), send_loop2(), site.start() @@ -361,6 +362,7 @@ def main(): parser.add_argument("--static", type=str) parser.add_argument("--host", default="localhost", type=str) parser.add_argument("--port", default=8998, type=int) + parser.add_argument("--no-browser", action="store_true") args = parser.parse_args() From e0b35345da495913170f43807b66e0177f43e85f Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 17 Sep 2024 21:59:31 +0100 Subject: [PATCH 9/9] chore: favicon --- client/index.html | 2 ++ client/public/assets/favicon-16x16.png | Bin 0 -> 1166 bytes client/public/assets/favicon-32x32.png | Bin 0 -> 1485 bytes client/public/assets/favicon.ico | Bin 0 -> 7406 bytes 4 files changed, 2 insertions(+) create mode 100644 client/public/assets/favicon-16x16.png create mode 100644 client/public/assets/favicon-32x32.png create mode 100644 client/public/assets/favicon.ico diff --git a/client/index.html b/client/index.html index 3a42de7..7ca0634 100644 --- a/client/index.html +++ b/client/index.html @@ -3,6 +3,8 @@ + + moshi.chat diff --git a/client/public/assets/favicon-16x16.png b/client/public/assets/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..79151b3f11ffc7c5ece53dc6bff28b05dcb223f9 GIT binary patch literal 1166 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl481pinodYtHlYs&gmrmH|eb_IWFT#q0Z8QB=?lEA`^UH=>N9@#5! zG+FI@@n(TS)07|UG>*`z~Ji?VWO*(ABKh*>8;R zK3veib7{hY1&y*21rt-&8*cu$FXZ}k=ecrIH@Di}C|MeeABLa*J%`!Nui@On=YQu^#x3V{nb&wb?7{l&63?x_rY$(?-*|(u zpj0^H6l=lRp5^g6H>d3mYEfSE`oM8jgFq&=nO|>QtH1ttPxb2zd&Z-+3f1f~vdo+B z$lw0Q`lY8h+aw_G1TX>=l0AZa85pY67#JE_7#My5g&JNkFq9fFFuY1&V6d9Oz#v{Q zXIG#NP-07fPlzj!=2nyDF;v#p(ypkekdcvLWMWimjuY@Q2@eVV@Zp1ng$0kcyvEEt zMjrO?uyAv8b75iO($doK@NiaER{8QU$yn$2@85fQc`aD5;Nr!LmX?;B3gQ}b3fLtC zKY#vw?AWo_uV1^kxX5Jr$YlHe{rmU&_3MuwJ?if6W&;8-0gc%OoQe`(zI^fY^h``l zeDmf_T3Q;LD8HbCp0>7jSy`F1v@|Q906cs9Se)#a| zV=*TO7iYHvtIJV|mZPdFOP4IHk&LutORQvdRh_|?D$2%|7;0L|>&p@E9nG$>VD<9+ z1P(z1qqO7%14C2e?Hjf%>eFXnt6=BZEmOH0Xq#$@YeY#(Vo9o1a#1RfVlXl=GSM}# z)HO5@F*2|+F|#r<&^9ozGB9|xb~X=+hTQy=%(P0}8fJe`-T>5~2C@N|tE`gq3o45; z(=$pK{PGpx936$y;#7sSqSVx!%H^{@Gy!$U!gQph7AF^F7L;V>=P?uqxjQK&m1gFY z=w{|UZasMvs89~3FgG!;G%Yc?q_ilt2vw!Ogr_u6r8rz=eoAT%s*%-?(z($eANDN7zp{cr5VJV|XPlSn|oqbSlsa22WQ%mvv4F FO#oxHyR85K literal 0 HcmV?d00001 diff --git a/client/public/assets/favicon-32x32.png b/client/public/assets/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..45770ad6648e43348a849405a0052e63cd635cc1 GIT binary patch literal 1485 zcmZ{kYfO`86vvfCZ5dT5yil$prQB)3DNv-9i&$zw=de(URIpSiDmu2dwhD*| zR~-Uc1Vk1<2{8%-hDuey+ZN?ADFWhT;1*yw7#G>=2bcI^AI>>BC%=FGd7hJ;yy&Pf zBm71@0KkaDW^s`Ue+(RiWL9ndeE?7raY#tCSSSR*xu~wNCH&rYvi87Np+E1$Qmm{z zNmy~v^u)C+XSPcn|HgE2EyvaVFq_M=qU;Zl?LN|yzpju>Y=d!1X4}*|4Xi9(YIfTFk+ zPyB$D1@2t2<(~edz2TE1&8Y^G;`--5?039eLiBZKcUiVi{8cfQ|MZpekHiEFF07ZU zeX9~^56*qTY5LW17=930Q~md!)Ohn3hh|dWdFjSAa>6f#8jXTNmKG&l_h|jl#T&d3 zQe|D%s@^&z<3-o`1>L+8<8!pC{r=N2V}4;yVMC33?dmpGtYSR&=}INEBTD|H`t4d} zc6_}N>yqLnf9(1#^4oy-$r|l0#SluGMNH|#NFNu}AJ{T{{ztBg+Bnnwz=6IS?O6VP zXyCiSwT_cBDcexm9QY*GiUPHc*u4CJSt!U%^^TT2{I~`on;Qwh2}c0rDgc&|k$eh( zEII(w0sxrb17InwY~LM-Y)KRy!3#wI2#3)(GeV(IL?Y3~#>UXl5DkEJY80*MTP(>W zKRuYOkadB}d05&Cam@^Gz-c}sZi2l=A zkFGqdxeguXQB57`J zUR+#MC=><;2Br)b|H)c=m0)miaAINt4WS*25BZI1Ox*2OR#qAs8ca=1joe^A-DRiJ z)cN^&rBZqO_H7gjP?hnFH+5zKn^h_m8jbey@|v5Q+qZ9@p&i+G;G%2oF@Zp^u&}^j zFpS-8Jv(xVAs)H8xg#SZ5fKqsEOu;cY+2Jq zFWVs>jYp}8#ZDQRk!Yl0)hz}xn&q&9c>euD0U0wTFUl)dsLRTX zE(8W(oh%_g6L%XLW)-arCfv2VlITCFb+E4^&$<+zFDg6bS{NJ6Qv zS~sGt#s?BufiIrD>Fmh)W&@08u~u23tI#u&o<8Q}9g<3QI&bLAm1WyNJI{R7Rha3> z53#3NhXw^Fno}cC&Q`@Stm8377PWN;x9=^#w0D?5V4QBt{s+Ux=}d25rYGH% zPG{2T(@hnG&jg3llf=o{|1YSRrQSdUjvpP6qr?=ZOS3b?2UBDqDjoi>*#c)}is0l7 zkti*@{^G0>QBXc9l0=!p46#%uPM3g8Zdfp!m?ch=xrrsO)xBB-wEhGhPLO0JCkSO( z8KR8O$z5d5CWN&7k4#SzrG1V(E_75QNbCt-oQ#`r43=i3CyUcWuq<0Df)RJ*oF1=y zJx{1#g7LFY|h7gqyYma5%>eZ{TPnBcWO)H*0eTwD#QZc0?8o9Z-;0w9<>TVXAHfo_9 zCC2H~r;#@^5*xoYqq4G+%u9{_$5xn3CiGr1qo_fHRRadZ8w0U*>sE4720A)AaQygj zq>c^7%a<=PFff4b?r!XPY{S{JXJIM|gPHWIsw%|9#K2~=p}0{+Vwor@DM6UV4{RO> zsbfOWxiD;Je0I@<=rjppNH3~aLlVYEQBe`B#WJ+_B_kn$ z$`A|Nu3Pc&;X^E5ycpTp*-FgRT`GhUF7op7AR?nk zB*K|9XVCxDioK-YxpN2o{r#|7t%%JC#D{-0z|j+jy1F_fB_)w#veEaw4NI0R!Ef%_ zaPs6ytY5z#yC2%{HR(r=93k`c!};^)5u)@%dwV+=OeS{x&4TdoaM&v&@a0_#8X6jq zY7fD@O^Mj_oe8&Z-$r0KAHl)F2vrNQ?wbs7xm(|)( z#EPwdF{5p}4vQ8oLRMB5Sr!KQFtBFL8pvcato_D_YuB!!ygi1T(}vs`3bOWWJbU&G zhYlS=*KhUc?CgY}pC7({K<57s8}^Xi|HOv9{~QgonXLV6720Gaaa@0vvL zHSHtf6|(BEQs;zd{L;}(5)c_WV!(p;Lw4rS0W{u@35#A6xt zyO8V}DWI+S;}u%!1TH*;8%RLR*bxHi%A?sVY7N|Y*nUm|YJ|fFC?ekb$dXM?;K461 zTD^L8RD9a70a{(t&$c^;;D^Pzj0%Nn_6J;|d5-ULjWG87>hsh^iXV^1j)j$R{{ddx z`-@-p5j>T5>VEm5rcjtasa0UYT>UitvL6Eb{JsqMGVuSM0g8pGNgqSHo2Rmg;%W?+ z0F;-P6HaM@EJ}!=2myBf-2(q09@NSHIC$_N-up_2($Z4w-n|=-A3p|%&%&~0)MJJL zp-_m8-x&$NV_@^<&4`bWr@32mb29`20pWpWtXj1S*RNkEcqXKBJ{lVvA(nE%-dunC4O#*S zQgPwJ1t^tDjIv13+uI9^#eyeKp3r>EKhVib7jDy{eqB61`jY|M3HPFS+7`lzH+-9c zb$>SC(W6IL@%uCc1oOZbacHhBAmjf1`_b3ehpMg^7z_qHc<=xm9t)EesA*1_HA#j! zAIH%=^2(Jf=p%asniMglQYqMMHqEgqPEYLvFbQ|9sHnj7&S*S;{v3vJAvDjAjEqE7 zN&qfiyoi7RX9f4HvQP45z?XqxGY}M+J!nsPH>a40lAN5JY7g|C$<*VNYDuwU^?)io zx#rDQr-k}v1s3`{HL0VXJXp&oIn3icZdjWpI7t|!4#yjFr?ohuT@1mA;!P~l&=T=a z&#!Q#<7#Wr!Qdl~vOByVQ{uGTISq_%}CZs7thzjrf&9he%9z zGKaQ#oGR)7Wd43Rd`I4VDhu9iifKGmJ?QHTj(3a>f6LoT@UhS`^Mv;O^bKsHv%;>#j;G!sJP1(2WVi(W6IER#t|d zo*p!ANPr}iOYdP*HJ+-&=>-y;IB^0Bg#u%zg(F%o!mV4k(EMovy)U1ipAUz_0S<>l zzdHoVcxY{HMOkSXWEueiLb$kh?;ic$Q(IdL@{pn5mD<|c(7Y)DTCEn-mq)oT-6!}m z;LE^&D+79&2(ai_hQwg%FE1#~qyk0oNAcp)ou>$55?x^A$U~gEUwkqmBEz{EPL({L qVh|yf_(!?+Aww17l(YnOKm=V{0g+V`mgc>M#f}X$MZ2PZ+VNjn=Tj#D literal 0 HcmV?d00001