From cf30ffb6609ddfe9dd50d3ef07eece355cca93c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= <alexandre.defossez@gmail.com>
Date: Tue, 17 Sep 2024 16:34:01 +0200
Subject: [PATCH 1/9] make pyproj find automaticallyt packages

---
 moshi/pyproject.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml
index 4b77729..662cbea 100644
--- a/moshi/pyproject.toml
+++ b/moshi/pyproject.toml
@@ -23,10 +23,6 @@ dynamic = ["version"]
 requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools]
-packages = ["moshi", "moshi.utils", "moshi.modules", "moshi.models", "moshi.quantization"]
-
-
 [project.optional-dependencies]
 dev = [
     "pyright",

From 0fb83ff3277c708a7cedb955e41ba6a865dd9e22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= <alexandre.defossez@gmail.com>
Date: Tue, 17 Sep 2024 16:59:08 +0200
Subject: [PATCH 2/9] disable

---
 moshi/moshi/models/compression.py |  6 ++--
 moshi/moshi/models/lm.py          |  5 +--
 moshi/moshi/utils/compile.py      |  9 +++--
 moshi/pyproject.toml              |  2 ++
 scripts/mimi_streaming_test.py    | 58 +++++++++++++++++--------------
 scripts/setup.cfg                 |  1 +
 6 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/moshi/moshi/models/compression.py b/moshi/moshi/models/compression.py
index 9cf9996..9c271ca 100644
--- a/moshi/moshi/models/compression.py
+++ b/moshi/moshi/models/compression.py
@@ -220,12 +220,14 @@ def __init__(
                 )
 
     def _init_streaming_state(self, batch_size: int) -> _MimiState:
+        device = next(self.parameters()).device
+        disable = device.type != 'cuda'
         graphed_tr_dec = None
         graphed_tr_enc = None
         if self.encoder_transformer is not None:
-            graphed_tr_enc = CUDAGraphed(self.encoder_transformer)
+            graphed_tr_enc = CUDAGraphed(self.encoder_transformerd, disable=disable)
         if self.decoder_transformer is not None:
-            graphed_tr_dec = CUDAGraphed(self.decoder_transformer)
+            graphed_tr_dec = CUDAGraphed(self.decoder_transformerd, disable=disable)
         return _MimiState(graphed_tr_enc, graphed_tr_dec)
 
     @property
diff --git a/moshi/moshi/models/lm.py b/moshi/moshi/models/lm.py
index 58ad922..8cca181 100644
--- a/moshi/moshi/models/lm.py
+++ b/moshi/moshi/models/lm.py
@@ -372,8 +372,9 @@ def _init_streaming_state(self, batch_size: int) -> _LMGenState:
             dtype=torch.long,
         )
 
-        graphed_main = CUDAGraphed(lm_model.forward_text)
-        graphed_depth = CUDAGraphed(self.depformer_step)
+        disable = lm_model.device.type != 'cuda'
+        graphed_main = CUDAGraphed(lm_model.forward_text, disable=disable)
+        graphed_depth = CUDAGraphed(self.depformer_step, disable=disable)
 
         return _LMGenState(cache, initial, graphed_main, graphed_depth)
 
diff --git a/moshi/moshi/utils/compile.py b/moshi/moshi/utils/compile.py
index b47e987..d101c86 100644
--- a/moshi/moshi/utils/compile.py
+++ b/moshi/moshi/utils/compile.py
@@ -194,11 +194,14 @@ class CUDAGraphed:
             be top level args, not nested in structures (tuples, dicts, etc). Keyword
             arguments are NOT supported for simplicity.
         warmup_steps: how many call to make normally before CUDA Graphing. In particular, this
-            allows torch.compiled functions to get properly compiled."""
+            allows torch.compiled functions to get properly compiled.
+        disabled: if True, just call the func directly, useful to quickly deactivate on CPU.
+    """
 
-    def __init__(self, func: tp.Callable, warmup_steps: int = 1):
+    def __init__(self, func: tp.Callable, warmup_steps: int = 1, disable: bool = False):
         self.func = func
         self.warmup_steps = warmup_steps
+        self.disable = disable
         self._graph: cuda.CUDAGraph | None = None
         self._output: tuple | None = None
         self._args: tuple | None = None
@@ -214,7 +217,7 @@ def reset(self, warmup_steps: int = 0) -> None:
     def __call__(self, *args, **kwargs) -> tp.Any:
         if kwargs:
             raise RuntimeError("Named arguments not supported for now.")
-        if not _is_cuda_graph_enabled() or in_cuda_graph():
+        if self.disable or not _is_cuda_graph_enabled() or in_cuda_graph():
             return self.func(*args, **kwargs)
 
         def _clone_tensors(args: tuple) -> tuple:
diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml
index 662cbea..d7044c3 100644
--- a/moshi/pyproject.toml
+++ b/moshi/pyproject.toml
@@ -18,6 +18,8 @@ maintainers = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
 license = {text = "MIT"}
 dynamic = ["version"]
 
+[tool.setuptools.dynamic]
+version = {attr = "moshi.__version__"}
 
 [build-system]
 requires = ["setuptools"]
diff --git a/scripts/mimi_streaming_test.py b/scripts/mimi_streaming_test.py
index d71f07c..5f8c4da 100644
--- a/scripts/mimi_streaming_test.py
+++ b/scripts/mimi_streaming_test.py
@@ -3,20 +3,23 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import moshi
+import random
 import time
-import torch
+
+import numpy as np
 import sphn
+import torch
 from torch.profiler import profile, ProfilerActivity
-import numpy as np
-import random
 
-SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE
-DEVICE = "cuda:0"
-ENABLE_PROFILING = False
+from moshi.models import loaders
+
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--weights", type=str)
+parser.add_argument("--weights", type=str, default=loaders.MIMI_V0_1)
+parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO)
+parser.add_argument("--device", type=str,
+                    default='cuda' if torch.cuda.device_count() else 'cpu')
+parser.add_argument("--profile", action='store_true')
 args = parser.parse_args()
 
 
@@ -35,23 +38,26 @@ def seed_all(seed):
 
 
 print("loading mimi")
-ec = moshi.models.moshi.get_encodec(args.weights, DEVICE)
+mimi = loaders.get_mimi(
+    loaders.resolve_model_checkpoint(args.weights, args.hf_repo),
+    args.device)
 print("mimi loaded")
 
 
-def encodec_streaming_test(ec, pcm_chunk_size=1920, max_duration_sec=10.0):
+def mimi_streaming_test(mimi, pcm_chunk_size=1920, max_duration_sec=10.0):
     # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
     sample_pcm, sample_sr = sphn.read("bria.mp3")
+    sample_rate = mimi.sample_rate
     print("loaded pcm", sample_pcm.shape, sample_sr)
     sample_pcm = sphn.resample(
-        sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=SAMPLE_RATE
+        sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=sample_rate
     )
-    sample_pcm = torch.tensor(sample_pcm, device=DEVICE)
-    max_duration_len = int(SAMPLE_RATE * max_duration_sec)
+    sample_pcm = torch.tensor(sample_pcm, device=args.device)
+    max_duration_len = int(sample_rate * max_duration_sec)
     if sample_pcm.shape[-1] > max_duration_len:
         sample_pcm = sample_pcm[..., :max_duration_len]
     print("resampled pcm", sample_pcm.shape, sample_sr)
-    sample_pcm = sample_pcm[None].to(device=DEVICE)
+    sample_pcm = sample_pcm[None].to(device=args.device)
 
     print("streaming encoding...")
     start_time = time.time()
@@ -61,34 +67,34 @@ def run_loop():
         for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size):
             end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size)
             chunk = sample_pcm[..., start_idx:end_idx]
-            codes, _scale = ec.encode(chunk)
+            codes = mimi.encode(chunk)
             if codes.shape[-1]:
                 print(start_idx, codes.shape, end="\r")
                 all_codes.append(codes)
 
-    if ENABLE_PROFILING:
+    if args.profile:
         with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
             run_loop()
         prof.export_chrome_trace("trace.json")
     else:
         run_loop()
-    all_codes = torch.cat(all_codes, dim=-1)
-    print(f"codes {all_codes.shape} generated in {time.time() - start_time:.2f}s")
+    all_codes_th = torch.cat(all_codes, dim=-1)
+    print(f"codes {all_codes_th.shape} generated in {time.time() - start_time:.2f}s")
     print("streaming decoding...")
     all_pcms = []
-    with ec.streaming():
-        for i in range(all_codes.shape[-1]):
-            codes = all_codes[..., i : i + 1]
-            pcm = ec.decode(codes, scale=None)
+    with mimi.streaming(1):
+        for i in range(all_codes_th.shape[-1]):
+            codes = all_codes_th[..., i : i + 1]
+            pcm = mimi.decode(codes)
             print(i, pcm.shape, end="\r")
             all_pcms.append(pcm)
     all_pcms = torch.cat(all_pcms, dim=-1)
     print("pcm", all_pcms.shape, all_pcms.dtype)
-    sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), SAMPLE_RATE)
-    pcm = ec.decode(all_codes, scale=None)
+    sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), sample_rate)
+    pcm = mimi.decode(all_codes_th)
     print("pcm", pcm.shape, pcm.dtype)
-    sphn.write_wav.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), SAMPLE_RATE)
+    sphn.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), sample_rate)
 
 
 with torch.no_grad():
-    encodec_streaming_test(ec)
+    mimi_streaming_test(mimi)
diff --git a/scripts/setup.cfg b/scripts/setup.cfg
index dc7aa4b..5bccac4 100755
--- a/scripts/setup.cfg
+++ b/scripts/setup.cfg
@@ -3,3 +3,4 @@ max-line-length = 120
 
 [flake8]
 max-line-length = 120
+ignore = E203,E704

From 73d7249471ec17dd40244f778b0cf90a595cf3fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= <alexandre.defossez@gmail.com>
Date: Tue, 17 Sep 2024 17:06:28 +0200
Subject: [PATCH 3/9] fixing mimi test

---
 moshi/moshi/models/compression.py  |  4 ++--
 moshi/moshi/modules/transformer.py | 17 +++++++++++------
 moshi/moshi/utils/compile.py       |  2 +-
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/moshi/moshi/models/compression.py b/moshi/moshi/models/compression.py
index 9c271ca..7a790d5 100644
--- a/moshi/moshi/models/compression.py
+++ b/moshi/moshi/models/compression.py
@@ -225,9 +225,9 @@ def _init_streaming_state(self, batch_size: int) -> _MimiState:
         graphed_tr_dec = None
         graphed_tr_enc = None
         if self.encoder_transformer is not None:
-            graphed_tr_enc = CUDAGraphed(self.encoder_transformerd, disable=disable)
+            graphed_tr_enc = CUDAGraphed(self.encoder_transformer, disable=disable)
         if self.decoder_transformer is not None:
-            graphed_tr_dec = CUDAGraphed(self.decoder_transformerd, disable=disable)
+            graphed_tr_dec = CUDAGraphed(self.decoder_transformer, disable=disable)
         return _MimiState(graphed_tr_enc, graphed_tr_dec)
 
     @property
diff --git a/moshi/moshi/modules/transformer.py b/moshi/moshi/modules/transformer.py
index 212d721..84d1952 100644
--- a/moshi/moshi/modules/transformer.py
+++ b/moshi/moshi/modules/transformer.py
@@ -9,6 +9,7 @@
 See `StreamingTransformer` for more information.
 """
 
+from contextlib import ExitStack
 from dataclasses import dataclass
 import typing as tp
 
@@ -17,6 +18,7 @@
 import torch.nn as nn
 from torch.nn import functional as F
 
+from ..utils.compile import no_compile
 from .gating import make_gating
 from .rope import RotaryEmbedding
 from .streaming import StreamingModule, StreamingContainer
@@ -579,12 +581,15 @@ def _sa_block(self, x: torch.Tensor):
         return x_orig + self.layer_scale_1(update)
 
     def forward(self, x: torch.Tensor):
-        x = self._sa_block(x)
-        x = self._ff_block(x)
-        state = self._streaming_state
-        if state:
-            state.offset_cpu += x.shape[1]
-        return x
+        with ExitStack() as stack:
+            if x.device.type != 'cuda':
+                stack.enter_context(no_compile())
+            x = self._sa_block(x)
+            x = self._ff_block(x)
+            state = self._streaming_state
+            if state:
+                state.offset_cpu += x.shape[1]
+            return x
 
 
 @dataclass
diff --git a/moshi/moshi/utils/compile.py b/moshi/moshi/utils/compile.py
index d101c86..780513b 100644
--- a/moshi/moshi/utils/compile.py
+++ b/moshi/moshi/utils/compile.py
@@ -23,7 +23,7 @@
 
 @contextmanager
 def no_compile():
-    """Disable torch.compile locally."""
+    """Disable torch.compile locally. Now Pytorch 2.4 provides a function to do that."""
     global _compile_disabled
 
     prev_disabled = _compile_disabled

From 811c885c644e5f5aea0444ae148c32dd37996ffd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= <alexandre.defossez@gmail.com>
Date: Tue, 17 Sep 2024 17:22:41 +0200
Subject: [PATCH 4/9] fix

---
 moshi/moshi/models/loaders.py  |  2 +-
 moshi/moshi/server.py          | 12 ++---
 moshi/pyproject.toml           |  2 +-
 moshi/requirements.txt         |  1 +
 scripts/mimi_streaming_test.py |  3 +-
 scripts/moshi_benchmark.py     | 87 ++++++++++++++++++----------------
 6 files changed, 57 insertions(+), 50 deletions(-)

diff --git a/moshi/moshi/models/loaders.py b/moshi/moshi/models/loaders.py
index af0ca7a..1917694 100644
--- a/moshi/moshi/models/loaders.py
+++ b/moshi/moshi/models/loaders.py
@@ -104,7 +104,7 @@ def _is_safetensors(path: Path | str) -> bool:
     return Path(path).suffix in (".safetensors", ".sft", ".sfts")
 
 
-def resolve_model_checkpoint(name: str, hf_repo: str = HF_REPO, allow_local_file: bool = False) -> Path:
+def resolve_model_checkpoint(name: str, hf_repo: str = HF_REPO, allow_local_file: bool = True) -> Path:
     """Load a model checkpoint from HF.
     If `allow_local_file` is True, then if a file `name` exists, it will be used instead.
     """
diff --git a/moshi/moshi/server.py b/moshi/moshi/server.py
index e610061..a67a332 100644
--- a/moshi/moshi/server.py
+++ b/moshi/moshi/server.py
@@ -175,11 +175,11 @@ def main():
     parser.add_argument("--gradio_tunnel_token",
                         help='Provide a custom (secret) token here to keep getting the same URL.')
 
-    parser.add_argument("--tokenizer-name", type=str, default=loaders.TEXT_TOKENIZER_V0_1,
+    parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1,
                         help="Name of the text tokenizer file in the given HF repo, or path to a local file.")
-    parser.add_argument("--moshi-name", type=str, default=loaders.MOSHIKO_V0_1,
+    parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1,
                         help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.")
-    parser.add_argument("--mimi-name", type=str, default=loaders.MIMI_V0_1,
+    parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1,
                         help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.")
     parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO,
                         help="HF repo to look into, defaults to Kyutai official one.")
@@ -204,15 +204,15 @@ def main():
             tunnel_token = args.gradio_tunnel_token
 
     log("info", "loading mimi")
-    mimi_path = loaders.resolve_model_checkpoint(args.mimi_name, args.hf_repo, allow_local_file=True)
+    mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo)
     mimi = loaders.get_mimi(mimi_path, args.device)
     log("info", "mimi loaded")
 
-    tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer_name, args.hf_repo, allow_local_file=True)
+    tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo)
     text_tokenizer = loaders.get_text_tokenizer(tokenizer_path)
 
     log("info", "loading moshi")
-    moshi_path = loaders.resolve_model_checkpoint(args.moshi_name, args.hf_repo, allow_local_file=True)
+    moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo)
     lm = loaders.get_moshi_lm(moshi_path, args.device)
     log("info", "moshi loaded")
 
diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml
index d7044c3..d5b7578 100644
--- a/moshi/pyproject.toml
+++ b/moshi/pyproject.toml
@@ -3,7 +3,7 @@ name = "moshi"
 requires-python = ">= 3.10"
 description = "Moshi is moshi"
 dependencies = [
-    "numpy >= 2.1.0, < 2.2",
+    "numpy >= 1.26, < 2.2",
     "safetensors >= 0.4.0, < 0.5",
     "huggingface-hub >= 0.24, < 0.25",
     "einops == 0.7",
diff --git a/moshi/requirements.txt b/moshi/requirements.txt
index 9a93905..876de9d 100644
--- a/moshi/requirements.txt
+++ b/moshi/requirements.txt
@@ -5,5 +5,6 @@ sounddevice==0.5.0
 soundfile==0.12.1
 sphn==0.1.4
 torch==2.2.0
+numpy==1.26.4
 aiohttp>=3.10.5, <3.11
 huggingface-hub==0.24.6
diff --git a/scripts/mimi_streaming_test.py b/scripts/mimi_streaming_test.py
index 5f8c4da..54865a3 100644
--- a/scripts/mimi_streaming_test.py
+++ b/scripts/mimi_streaming_test.py
@@ -44,7 +44,8 @@ def seed_all(seed):
 print("mimi loaded")
 
 
-def mimi_streaming_test(mimi, pcm_chunk_size=1920, max_duration_sec=10.0):
+def mimi_streaming_test(mimi, max_duration_sec=10.0):
+    pcm_chunk_size = int(mimi.sample_rate / mimi.frame_rate)
     # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
     sample_pcm, sample_sr = sphn.read("bria.mp3")
     sample_rate = mimi.sample_rate
diff --git a/scripts/moshi_benchmark.py b/scripts/moshi_benchmark.py
index 056542f..0bd015b 100644
--- a/scripts/moshi_benchmark.py
+++ b/scripts/moshi_benchmark.py
@@ -3,26 +3,30 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import moshi
-import sentencepiece
-import torch
-import sphn
-import numpy as np
 import random
 import time
 
+import numpy as np
+import sentencepiece
+import sphn
+import torch
 from torch.profiler import profile, ProfilerActivity
 
-SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE
-DEVICE = "cuda:0"
-ENABLE_PROFILING = False
+from moshi.models import loaders, LMGen
+
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--tokenizer", type=str)
-parser.add_argument("--moshi-weights", type=str)
-parser.add_argument("--mimi-weights", type=str)
+parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1,
+                    help="Name of the text tokenizer file in the given HF repo, or path to a local file.")
+parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1,
+                    help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.")
+parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1,
+                    help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.")
+parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO,
+                    help="HF repo to look into, defaults to Kyutai official one.")
 parser.add_argument("--steps", default=100, type=int)
 parser.add_argument("--profile", action="store_true")
+parser.add_argument("--device", type=str, default='cuda')
 args = parser.parse_args()
 
 
@@ -39,52 +43,53 @@ def seed_all(seed):
 
 seed_all(42424242)
 
+tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo)
+text_tokenizer = loaders.get_text_tokenizer(tokenizer_path)
 
 print("loading mimi")
-ec = moshi.models.moshi.get_encodec(args.mimi_weights, DEVICE)
+mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo)
+mimi = loaders.get_mimi(mimi_path, args.device)
 print("mimi loaded")
-text_tokenizer = sentencepiece.SentencePieceProcessor(args.tokenizer)
 
 print("loading moshi")
-lm = moshi.models.moshi.get_lm(args.moshi_weights, DEVICE)
-lm.to(torch.bfloat16)
+moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo)
+lm = loaders.get_moshi_lm(moshi_path, args.device)
+lm_gen = LMGen(lm)
 print("lm loaded")
 
-lm_gen = moshi.models.LMGen(lm)
-
 
 def cb(step, total):
     print(f"{step:06d} / {total:06d}", end="\r")
 
 
 def streaming_test(bs):
-
     main_audio = []
     main_text = []
 
+    frame_size = int(mimi.sample_rate / mimi.frame_rate)
+
     def run_step():
         start_time = time.time()
         # Chunk should contain the pcm data from the user, single channel with a sample rate of 24000.
-        chunk = torch.zeros((bs, 1, 1920), dtype=torch.float, device=DEVICE)
-        codes = ec.encode(chunk)
+        chunk = torch.zeros((bs, 1, frame_size), dtype=torch.float, device=args.device)
+        codes = mimi.encode(chunk)
         assert codes.shape[-1] == 1
-        for c in range(codes.shape[-1]):
-            be = time.time()
-            ev = torch.cuda.Event(enable_timing=True)
-            ev.record()
-            tokens = lm_gen.step(codes[:, :, c : c + 1])
-            if tokens is None:
-                print("Skipping")
-                return
-            evb = torch.cuda.Event(enable_timing=True)
-            evb.record()
-            dt_step = time.time() - be
-            text_tokens = tokens[:, 0, 0]
-            audio_tokens = tokens[:, 1:, :]
-            main_pcm = ec.decode(audio_tokens)
-            # main_pcm is the audio to be played back to the user, here we just append it and store it in
-            # a file once the loop is finished.
-            main_audio.append(main_pcm[0])
+        be = time.time()
+        ev = torch.cuda.Event(enable_timing=True)
+        ev.record()
+        tokens = lm_gen.step(codes[:, :, :1])
+        if tokens is None:
+            print("Skipping")
+            return
+        evb = torch.cuda.Event(enable_timing=True)
+        evb.record()
+        dt_step = time.time() - be
+        text_tokens = tokens[:, 0, 0]
+        audio_tokens = tokens[:, 1:, :]
+        main_pcm = mimi.decode(audio_tokens)
+        # main_pcm is the audio to be played back to the user, here we just append it and store it in
+        # a file once the loop is finished.
+        main_audio.append(main_pcm[0])
         evb.synchronize()
         dg = ev.elapsed_time(evb)
         torch.cuda.synchronize()
@@ -109,17 +114,17 @@ def run_step():
                 run_step()
         print()
         prof.export_chrome_trace("trace.json")
-    main_audio = torch.cat(main_audio, dim=-1)
-    print(main_audio.shape)
+    main_audio_th = torch.cat(main_audio, dim=-1)
+    print(main_audio_th.shape)
     print("generated text:")
     print("".join(main_text))
     sphn.write_wav(
-        "gen_main.wav", main_audio[0].cpu().numpy().astype(np.float32), SAMPLE_RATE
+        "gen_main.wav", main_audio_th[0].cpu().numpy().astype(np.float32), mimi.sample_rate
     )
 
 
 print("streaming test")
 bs = 1
 with torch.no_grad():
-    with ec.streaming(bs), lm_gen.streaming(bs):
+    with mimi.streaming(bs), lm_gen.streaming(bs):
         streaming_test(bs)

From 5b4ebe107a651256033008769b1cb29948fce266 Mon Sep 17 00:00:00 2001
From: FL33TW00D <FL33TW00D@users.noreply.github.com>
Date: Tue, 17 Sep 2024 17:53:39 +0100
Subject: [PATCH 5/9] chore: update config

---
 rust/moshi-backend/config-q8.json | 2 +-
 rust/moshi-backend/config.json    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rust/moshi-backend/config-q8.json b/rust/moshi-backend/config-q8.json
index 1f0ad96..4e45147 100644
--- a/rust/moshi-backend/config-q8.json
+++ b/rust/moshi-backend/config-q8.json
@@ -1,6 +1,6 @@
 {
   "instance_name": "foo",
-  "hf_repo": "kmhf/msh-v0.1",
+  "hf_repo": "kmhf/moshi-v0.1",
   "lm_model_file": "$HOME/tmp/moshiko_rs_301e30bf@120.q8.gguf",
   "text_tokenizer_file": "$HOME/tmp/tokenizer_spm_32k_3.model",
   "log_dir": "$HOME/tmp/moshi-logs",
diff --git a/rust/moshi-backend/config.json b/rust/moshi-backend/config.json
index 3a7fa9d..108b2c2 100644
--- a/rust/moshi-backend/config.json
+++ b/rust/moshi-backend/config.json
@@ -1,6 +1,6 @@
 {
   "instance_name": "foo",
-  "hf_repo": "kmhf/msh-v0.1",
+  "hf_repo": "kmhf/moshi-v0.1",
   "lm_model_file": "$HOME/tmp/moshiko_rs_301e30bf@120.safetensors",
   "text_tokenizer_file": "$HOME/tmp/tokenizer_spm_32k_3.model",
   "log_dir": "$HOME/tmp/moshi-logs",

From ea5228caf7c36653a665ac14b682d8aa0460b65c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexandre=20D=C3=A9fossez?= <alexandre.defossez@gmail.com>
Date: Tue, 17 Sep 2024 19:10:15 +0200
Subject: [PATCH 6/9] fix

---
 .github/workflows/precommit.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
index 643e3bc..51ca7ba 100644
--- a/.github/workflows/precommit.yml
+++ b/.github/workflows/precommit.yml
@@ -1,4 +1,4 @@
-name: precommmit
+name: precommit
 on:
   push:
     branches: [ main ]

From 91dc799ef3560cba3525118244581509e1719ca0 Mon Sep 17 00:00:00 2001
From: FL33TW00D <FL33TW00D@users.noreply.github.com>
Date: Tue, 17 Sep 2024 21:23:32 +0100
Subject: [PATCH 7/9] chore: autoopen browser

---
 moshi_mlx/moshi_mlx/local_web.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/moshi_mlx/moshi_mlx/local_web.py b/moshi_mlx/moshi_mlx/local_web.py
index 2e22aba..b63a9e6 100644
--- a/moshi_mlx/moshi_mlx/local_web.py
+++ b/moshi_mlx/moshi_mlx/local_web.py
@@ -17,6 +17,7 @@
 import sphn
 import aiohttp
 from aiohttp import web
+import webbrowser
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -334,6 +335,10 @@ async def handle_root(_):
         runner = web.AppRunner(app)
         await runner.setup()
         site = web.TCPSite(runner, args.host, args.port)
+
+        log("info", f"opening browser at http://{args.host}:{args.port}")
+        webbrowser.open(f"http://{args.host}:{args.port}")
+
         await asyncio.gather(
             recv_loop(), send_loop(), recv_loop2(), send_loop2(), site.start()
         )

From e64bd8ad339a6c13966e891a0fc40111e80a5375 Mon Sep 17 00:00:00 2001
From: FL33TW00D <FL33TW00D@users.noreply.github.com>
Date: Tue, 17 Sep 2024 21:34:40 +0100
Subject: [PATCH 8/9] chore: flag

---
 moshi_mlx/moshi_mlx/local_web.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/moshi_mlx/moshi_mlx/local_web.py b/moshi_mlx/moshi_mlx/local_web.py
index b63a9e6..07b785b 100644
--- a/moshi_mlx/moshi_mlx/local_web.py
+++ b/moshi_mlx/moshi_mlx/local_web.py
@@ -336,8 +336,9 @@ async def handle_root(_):
         await runner.setup()
         site = web.TCPSite(runner, args.host, args.port)
 
-        log("info", f"opening browser at http://{args.host}:{args.port}")
-        webbrowser.open(f"http://{args.host}:{args.port}")
+        if not args.no_browser:
+            log("info", f"opening browser at http://{args.host}:{args.port}")
+            webbrowser.open(f"http://{args.host}:{args.port}")
 
         await asyncio.gather(
             recv_loop(), send_loop(), recv_loop2(), send_loop2(), site.start()
@@ -361,6 +362,7 @@ def main():
     parser.add_argument("--static", type=str)
     parser.add_argument("--host", default="localhost", type=str)
     parser.add_argument("--port", default=8998, type=int)
+    parser.add_argument("--no-browser", action="store_true")
 
     args = parser.parse_args()
 

From e0b35345da495913170f43807b66e0177f43e85f Mon Sep 17 00:00:00 2001
From: FL33TW00D <FL33TW00D@users.noreply.github.com>
Date: Tue, 17 Sep 2024 21:59:31 +0100
Subject: [PATCH 9/9] chore: favicon

---
 client/index.html                      |   2 ++
 client/public/assets/favicon-16x16.png | Bin 0 -> 1166 bytes
 client/public/assets/favicon-32x32.png | Bin 0 -> 1485 bytes
 client/public/assets/favicon.ico       | Bin 0 -> 7406 bytes
 4 files changed, 2 insertions(+)
 create mode 100644 client/public/assets/favicon-16x16.png
 create mode 100644 client/public/assets/favicon-32x32.png
 create mode 100644 client/public/assets/favicon.ico

diff --git a/client/index.html b/client/index.html
index 3a42de7..7ca0634 100644
--- a/client/index.html
+++ b/client/index.html
@@ -3,6 +3,8 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <link rel="icon" type="image/png" sizes="32x32" href="/assets/favicon-32x32.png">
+    <link rel="icon" type="image/png" sizes="16x16" href="/assets/favicon-16x16.png">
     <title>moshi.chat</title>
   </head>
   <body class=" bg-black font-mono font-thin">
diff --git a/client/public/assets/favicon-16x16.png b/client/public/assets/favicon-16x16.png
new file mode 100644
index 0000000000000000000000000000000000000000..79151b3f11ffc7c5ece53dc6bff28b05dcb223f9
GIT binary patch
literal 1166
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl481pinodYtHlYs&gmrmH|eb_<b
zXneHmMK5+)85tuH_DsiDk5-gwcxWyS+wf}A5>IWFT#q0Z8QB=?lEA`^UH=>N9@#5!
zG+FI@@n(TS)07|UG><Ht`{&Tz?~m{9J$;w?kgv1p+=Z0~*6b`(oIK6UYqrq8DQm2*
z71_-{df-=;i^rGtB^KEq_U$u`yCx&b(K|^z^mN%t?<aTGWpn0DT7G3!jN1Ik!e;s&
z`^0xW`aJtV*ZKQRZ<7*PIowYM9ev)Ca5Vj+nCIGCYBv>*`z~Ji?VWO*(ABKh*>8;R
zK3veib7{hY1&y*21rt-&8*cu$FXZ}k=ecrIH@Di}C<zHzYVNr8V6bV~++D}cKi~_S
zP&;AeV*Mq5>|MeeABLa*J%`!Nui@On=YQu^#x3V{nb&wb?7{l&63?x_rY$(?-*|(u
zpj0^H6l=lRp5^g6H>d3mYEfSE`oM8jgFq&=nO|>QtH1ttPxb2zd&Z-+3f1f~vdo+B
z$lw0Q`lY8h+aw_G1TX>=l0AZa85pY67#JE_7#My5g&JNkFq9fFFuY1&V6d9Oz#v{Q
zXIG#NP-07fPlzj!=2nyDF;v#p(ypkekdcvLWMWimjuY@Q2@eVV@Zp1ng$0kcyvEEt
zMjrO?uyAv8b75iO($doK@NiaER{8QU$yn$2@85fQc`aD5;Nr!LmX?;B3gQ}b3fLtC
zKY#vw?AWo_uV1^kxX5Jr$YlHe{rmU&_3MuwJ?if6W&;8-0gc%OoQe`(zI^fY^h``l
zeDmf_T3Q;LD8HbCp0>7jSy`F1v@|<AdrC@5YHBJM7uWy)|4m+aRRZ13nB?v5Vr|Ly
zB?ri1FY)wsWq-jbD558Fvt_#_P$<dM#W6(Ua&p1~v4oVQv_#iOn>Q906cs9Se)#a|
zV=*TO7iYHvtIJV|mZPdFOP4IHk&LutORQvdRh_|?D$2%|7;0L|>&p@E9nG$>VD<9+
z1P(z1qqO7%14C2e?Hjf%>eFXnt6=BZEmOH0Xq#$@YeY#(Vo9o1a#1RfVlXl=GSM}#
z)HO5@F*2|+F|#r<&^9ozGB9|xb~X=+hTQy=%(P0}8fJe`-T>5~2C@N|tE`gq3o45;
z(=$pK{PGpx936$y;#7sSqSVx!%H^{@Gy!$U!gQph7AF^F7L;V>=P?uqxjQK&m1gFY
z=w{|UZasMvs89~3FgG!;G%Yc?q_ilt2vw!Ogr_u6r8rz=eoAT%s*<x4)LMZ`!mC0e
zN`ey06$*;-(=u~X6-p`#QWb!<1IvSoIgdZ_a1@4VXq@stea7=?5CgL^w_Y;0u(GiC
zWD#az1(ybs!zs+ln?n>%-?(z($eANDN7zp{cr5VJV|XPlSn|oqbSlsa22WQ%mvv4F
FO#oxHyR85K

literal 0
HcmV?d00001

diff --git a/client/public/assets/favicon-32x32.png b/client/public/assets/favicon-32x32.png
new file mode 100644
index 0000000000000000000000000000000000000000..45770ad6648e43348a849405a0052e63cd635cc1
GIT binary patch
literal 1485
zcmZ{kYfO`86vv<TrL>fCZ5dT5yil$prQB)3DNv-9i&$zw=de(URIpSiDmu2dwhD*|
zR~-Uc1Vk1<2{8%-hDuey+ZN?ADFWhT;1*yw7#G>=2bcI^AI>>BC%=FGd7hJ;yy&Pf
zBm71@0KkaDW^s`Ue+(RiWL9ndeE?7raY#tCSSSR*xu~wNCH&rYvi87Np+E1$Qmm{z
zNmy~v^u)C+XSPcn|HgE2EyvaVFq_M=qU;Zl?LN|yzpju>Y=d!1X4}*|4Xi9(YIf<V
zu3EjTTYRnS?AJkeBSU<us<XQr+m70nocG;XVg8}CkvWj@<z;R6@(C7u;Y_W6>TFk+
zPyB$D1@2t2<(~edz2TE1&8Y^G;`--5?039eLiBZKcUiVi{8cfQ|MZpekHiEFF07ZU
zeX9~^56*qTY5LW17=930Q~md!)Ohn3hh|dWdFjSAa>6f#8jXTNmKG&l_h|jl#T&d3
zQe|D%s@^&z<3-o`1>L+8<8!pC{r=N2V}4;yVMC33?dmpGtYSR&=}INEBTD|H`t4d}
zc6_}N>yqLnf9(1#^4oy-$r|l0#SluGMNH|#NFNu}AJ{T{{ztBg+Bnnwz=6IS?O6VP
zXyCiSwT_cBDcexm9QY*GiUPHc*u4CJSt!U%^^TT2{I~`on;Qwh2}c0rDgc&|k$eh(
zEII(w0sxrb17InwY~LM-Y)KRy!3#wI2#3)(GeV(IL?Y3~#>UXl5DkEJY80*MTP(>W
zKR<tRa&mNZG%_+0w~6TWxRll+#}IH;RaNWj>uYOkadB}d05&Cam@^Gz-c}sZi2l=A
zkFGqdxe<v(f??R!)|Nt{KoCUcZ?zJ#X*61QcehTblS-v}dU_awfmdIV!A5f@C#RmC
zp5^7`f`S49fnX8l#rXXiEaUO{{LaqKrKKgcT8+o!AuPtVCd=nVxgmAK>guXQB57`J
zUR+#MC=><;2Br)b|H)c=m0)miaAINt4WS*25BZI1Ox*2OR#qAs8ca=1joe^A-DRiJ
z)cN^&rBZqO_H7gjP?hnFH+5zKn^h_m8jbey@|v5Q+qZ9@p&i+G;G%2oF@Zp^u&}^j
zFpS-8Jv(xVAs)H8xg#SZ5fKqsEOu;cY<zrNqtTd{n4loEKG7JZ2br6jTU%S}>+2Jq
zFWVs>jY<p;W08tq_4*(UR>p}8#ZDQRk!Yl0)hz}xn&q&9c>euD0U0wTFUl)dsLRTX
zE(8W(oh%_g6L%XLW)-arCfv2VlITCFb+E4^&$<+zFDg6bS{NJ6<L-#j*fsPS$7rrT
zsi|&IZ@%t!t)?IB(bT&q3Q|<b@|L^?f0m-WxGh&*XK}IP=D8*c6sPDsRg+uNYS>Qv
zS~sGt#s?BufiIrD>Fmh)W&@08u~u23tI#u&o<8Q}9g<3QI&bLAm1WyNJI{R7Rha3>
z53#3NhXw^Fno}cC&Q`@Stm8377PWN;x9=^#w0D?5V4QBt{s+Ux<Im<-Hx_dpqH#A|
zTwKaL3eb80bmiE!Cb#@1L*DKn3yqT{CCEffVTK4Pz?1Ij?M7#~dHC@>=}d25rYGH%
zPG{2T(@hnG&jg3llf=o{|1YSRrQSdUjvpP6qr?=ZOS3b?2UBDqDjoi>*#c)}is0l7
zkti*@{^G0>QBXc9l0=!p46#%uPM3g8Zdfp!m?ch=xrrsO)xBB-wEhGhPLO0JCkSO(
z8KR8O$z5d5CWN&7k4#SzrG1V(E_75QNbCt-oQ#`r43=i3CyUcWuq<0Df)RJ*oF1=y
zJx{<L;X6C`?dpHTdBX(|)sQAMBvOgtB_xOlv*h0Qk|1g;5AGj&{;=m!k5v!0U(VL-
Yq=9KmGueEwPZ?qXIH6Ij=3s&HUwi665&!@I

literal 0
HcmV?d00001

diff --git a/client/public/assets/favicon.ico b/client/public/assets/favicon.ico
new file mode 100644
index 0000000000000000000000000000000000000000..c5ef69a1f3e991a31604fa1365a90b6b68fe62e7
GIT binary patch
literal 7406
zcmeHMdr(tX8vo^;kPrgl5lBK3-Vz9eK%O8rLInznQ~?ncG*A=-w1S9iRfMh;yV~m7
zRcpmoU9np`qPwFrt1GjPf2`ece6;HLSYNx&*7sxAx9+(&<iQS1yE8k}*`1up$@w0?
z@7{C2lY77WI{_GAAvG0b<YT88Fal7k2X#JhiP$6(r(VYa-VXy32!@29igYTDZUW~(
ze>1#g7LFY|h7gqyYma5%>eZ{TPnBcWO)H*0eTwD#QZc0?8o9Z-;0w9<>TVXAHfo_9
zCC2H~r;#@^5*xoYqq4G+%u9{_$5xn3CiGr1qo_fHRRadZ8w0U*>sE4720A)AaQygj
zq>c^7%a<=PFff4b?r!XPY{S{JXJIM|gPHWIsw%|9#K2~=p}0{+Vwor@DM6UV4{RO>
zsbfOWxi<w%mo5dL&&R1#r!as1d>D;Je0I@<=rjppNH3~aLlVYEQBe`B#WJ+_B_kn$
z$`A|Nu3Pc&;X^E5ycpTp*<i9*V3E$|vas`eE0VLN;0jpK>-FgRT`GhUF7op7AR?nk
zB*K|9XVCxDioK-YxpN2o{r#|7t%%JC#D{-0z|j+jy1F_fB_)w#veEaw4NI0R!Ef%_
zaPs6ytY5z#yC2%{HR(r=93k`c!};^)5u)@%dwV+=OeS{x&4TdoaM&v&@a0_#8X6jq
zY7fD@O^Mj_oe8&Z-$r0KAHl)F2vrNQ?wbs7xm<jC%L-MZ7{UN9($mu+3gY7R>(|)(
z#EPwdF{5p}4vQ8oLRMB5Sr!KQFtBFL8pvcato_D_YuB!!ygi1T(}vs`3bOWWJbU&G
zhYlS=*KhUc?CgY}pC7({K<57s8}^Xi|HOv9{~QgonXLV6720<t;nJl`*zs2j4jedu
zxt}EB#*G_jYHGr(SFgyN_z;W5BrX%@&YgpKybL`9X|PU|Vc))es9BkSiA`!`W@dW*
zV11%51HKIWFK1wJV9IjO{3lFN+Lfbdz56q}-JU+j<8w;e{9BSew(c)T4Y5;tYu6Sc
zR;}^)oRY~nGtpz~j&b3oZxCryx5qUoP5t!qviS;^nZ_%LR!~FW#RV?A*O)hE`bxJ-
z<24C*_tykIFUf8967rX<duzN)r|}&hR;*TcbnxHFb(x2ZvEz7|*_}?46ThYSZE|=y
zRYZqrg@TeS(cl$0@dVt_F)Mf&EN!Z9*w{GeqwrfcuOe{H<dNX8hEEi~>Gaaa@0vvL
zHSHtf6|(BEQs;zd{L;}(5)c_WV!(p;Lw4rS0W{u@3<C+6v3`UA1;cM`Pyp>5#A6xt
zyO8V}DWI+S;}u%!1TH*;8%RLR*bxHi%A?sVY7N|Y*nUm|YJ|fFC?ekb$dXM?;K461
zTD^L8RD9a70a{(t&$c^;;D^Pzj0%Nn_6J;|d5-ULjWG87>hsh^iXV^1j)j$R{{ddx
z`-@-p5j>T5>VEm5rcjtasa0UYT>UitvL6Eb{JsqMGVuSM0g8pGNgqSHo2Rmg;%W?+
z0F;-P6HaM@EJ}!=2myBf-2(q09@NSHIC$_N-up_2($Z4w-n|=-A3p|%&%&~0)MJJL
zp-_m8-x&$NV_@^<&4`bWr@32mb29`20pWpWtXj1S*RNkEcqXKBJ{lVvA(nE%<MGh)
zg_guJ@xdWI?%%(Ul`B`$JelHL6mQdyl_Jp=i2PbP!lQ*)v0?=-U%m{DMuVWBAevJa
z78c^gix<%8bQoVBiM&|~s8lK}-J_#9l7uGl3>-dunC4<xr81gtvDggwOZn*P>O#*S
zQgPwJ1t^tDjIv13+uI9^#eyeKp3r>EKhVib7jDy{eqB61`jY|M3HPFS+7`lzH+-9c
zb$>SC(W6IL@%uCc1oOZbacHhBAmjf1`_b3ehpMg^7z_qHc<=xm9t)EesA*1_HA#j!
zAIH%=^2(Jf=p%asniMglQYqMMHqEgqPEYLvFbQ|9sHnj7&S*S;{v3vJAvDjAjEqE7
zN&qfiyoi7RX9f4HvQP45z?XqxGY}M+J!nsPH>a40lAN5JY7g|C$<*VNYDuwU^?)io
zx#rDQr-k}v1s3`{HL0VXJXp&oIn3icZdjWpI7t|!4#yjFr?ohuT@1mA;!P~l&=T=a
z&#!Q#<<IsC2Q;&o3tWCjbG>7#Wr!Qdl~vOByVQ{uGTISq_%}CZs7thzjrf&9he%9z
zGKaQ#oGR)7Wd43Rd`I4VDhu9iifKGmJ?QHTj(3a>f6LoT<gD5Z`Jf*LTSQCf&?~?g
zdlY%YcIF3h4Z0j^7uBl;B-YM!`@QULQSr8^%oXo8dI|D$K`CM)%yYbKLqz|AOw<{T
zk|9Q)^3!I3y0NGgQIqa|Pa(IEyLri_3sq##Spm6R4tboAeowf0^CqgRtLeH+)yFWk
zfZiizFd1|W=ZP3-X=y=0K>@UhS`^Mv;O^bKsHv%;>#j;G!sJP1(2WVi(W6IER#t|d
zo*p!ANPr}iOYdP*HJ+-&=>-y;IB^0Bg#u%zg(F%o!mV4k(EMovy)U1ipAUz_0S<>l
zzdHoVcxY{HMOkSXWEueiLb$kh?;ic$Q(IdL@{pn5mD<|c(7Y)DTCEn-mq)oT-6!}m
z;LE^&D+79&2(ai_hQwg%FE1#~qyk0oNAcp)ou>$55?x^A$U~gEUwkqmBEz{EPL({L
qVh|yf_(!?+Aww17l(YnOKm=V{0g+V`mgc>M#f}X$MZ2PZ+VNjn=Tj#D

literal 0
HcmV?d00001