diff --git a/moshi/README.md b/moshi/README.md
new file mode 100644
index 0000000..022ce8d
--- /dev/null
+++ b/moshi/README.md
@@ -0,0 +1 @@
+# moshi - pytorch
diff --git a/moshi/moshi/client.py b/moshi/moshi/client.py
index 2872738..cba9048 100644
--- a/moshi/moshi/client.py
+++ b/moshi/moshi/client.py
@@ -1,16 +1,17 @@
 # Copyright (c) Kyutai, all rights reserved.
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
+"""Client for the Moshi server."""
 
 import argparse
 import asyncio
 import queue
 import sys
 
+import aiohttp
 import numpy as np
 import sphn
 import sounddevice as sd
-import aiohttp
 
 from .client_utils import AnyPrinter, Printer, RawPrinter
 
@@ -141,7 +142,27 @@ async def run(self) -> None:
 
 
 async def run(printer: AnyPrinter, args):
-    uri = f"ws://{args.host}:{args.port}/api/chat"
+    if args.url is None:
+        proto = "ws"
+        if args.https:
+            proto += "s"
+        uri = f"{proto}://{args.host}:{args.port}/api/chat"
+    else:
+        proto = "wss"
+        if '://' in args.url:
+            proto, without_proto = args.url.split('://', 1)
+            if proto in ['ws', 'http']:
+                proto = "ws"
+            elif proto in ['wss', 'https']:
+                proto = "wss"
+            else:
+                printer.log("error", "The provided URL {args.url} seems to contain a protocol but it is unknown.")
+                sys.exit(1)
+        else:
+            without_proto = args.url
+        uri = f"{proto}://{without_proto}/api/chat"
+
+    printer.log("info", "Connecting to {uri}.")
     async with aiohttp.ClientSession() as session:
         async with session.ws_connect(uri) as ws:
             printer.log("info", "connected!")
@@ -152,8 +173,11 @@ async def run(printer: AnyPrinter, args):
 
 def main():
     parser = argparse.ArgumentParser("client_opus")
-    parser.add_argument("--host", default="localhost", type=str)
-    parser.add_argument("--port", default=8998, type=int)
+    parser.add_argument("--host", default="localhost", type=str, help="Hostname to connect to.")
+    parser.add_argument("--port", default=8998, type=int, help="Port to connect to.")
+    parser.add_argument("--https", action='store_true',
+                        help="Set this flag for using a https connection.")
+    parser.add_argument("--url", type=str, help='Provides directly a URL, e.g. to a gradio tunnel.')
     args = parser.parse_args()
     printer: AnyPrinter
 
diff --git a/moshi/moshi/client_utils.py b/moshi/moshi/client_utils.py
index c7aaa92..0bc37f5 100644
--- a/moshi/moshi/client_utils.py
+++ b/moshi/moshi/client_utils.py
@@ -1,6 +1,8 @@
 # Copyright (c) Kyutai, all rights reserved.
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
+"""Utilities for the command line client, in particular for handling interactions with the terminal.
+"""
 
 from dataclasses import dataclass
 import sys
@@ -14,11 +16,11 @@ def colorize(text, color):
 
 def make_log(level: str, msg: str) -> str:
     if level == "warning":
-        prefix = colorize("Warning:", "1;31")
+        prefix = colorize("[Warn]", "1;31")
     elif level == "info":
-        prefix = colorize("Info:", "1;34")
+        prefix = colorize("[Info]", "1;34")
     elif level == "error":
-        prefix = colorize("Error:", "1;31")
+        prefix = colorize("[Err ]", "1;31")
     else:
         raise ValueError(f"Unknown level {level}")
     return prefix + " " + msg
diff --git a/moshi/moshi/models/__init__.py b/moshi/moshi/models/__init__.py
index 1fcf526..5501848 100644
--- a/moshi/moshi/models/__init__.py
+++ b/moshi/moshi/models/__init__.py
@@ -1,20 +1,14 @@
 # Copyright (c) Kyutai, all rights reserved.
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
 """
-Models for EnCodec, AudioGen, MusicGen, as well as the generic LMModel.
+Models for the compression model Moshi,
 """
 
 # flake8: noqa
-from .encodec import (
+from .compression import (
     CompressionModel,
-    EncodecModel,
+    MimiModel,
 )
 from .lm import LMModel, LMGen
-from .moshi_ import get_encodec, get_lm
+from .loaders import get_mimi, get_moshi_lm
diff --git a/moshi/moshi/models/compression.py b/moshi/moshi/models/compression.py
index 02d1723..7a790d5 100644
--- a/moshi/moshi/models/compression.py
+++ b/moshi/moshi/models/compression.py
@@ -2,13 +2,15 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
+# Part of this file is adapted from encodec.py in https://github.com/facebookresearch/audiocraft
+# released under the following license.
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
-"""Compression models or wrapper around existing models.
-Also defines the main interface that a model must follow to be usable as an audio tokenizer.
+"""Compression models or wrapper around existing models. In particular, provides the implementation
+for Mimi. Also defines the main interface that a model must follow to be usable as an audio tokenizer.
 """
 
 from abc import abstractmethod
@@ -19,7 +21,6 @@
 
 import torch
 from torch import nn
-from torch.nn import functional as F
 
 
 from ..quantization import (
@@ -46,12 +47,12 @@ def forward(self, x: torch.Tensor) -> QuantizedResult: ...
 
     @abstractmethod
     def encode(self, x: torch.Tensor) -> torch.Tensor:
-        """See `EncodecModel.encode`."""
+        """See `MimiModel.encode`."""
         ...
 
     @abstractmethod
     def decode(self, codes: torch.Tensor) -> torch.Tensor:
-        """See `EncodecModel.decode`."""
+        """See `MimiModel.decode`."""
         ...
 
     @abstractmethod
@@ -90,7 +91,7 @@ def set_num_codebooks(self, n: int):
 
 
 @dataclass
-class _EncodecState:
+class _MimiState:
     graphed_tr_enc: CUDAGraphed | None
     graphed_tr_dec: CUDAGraphed | None
 
@@ -98,8 +99,8 @@ def reset(self):
         pass
 
 
-class EncodecModel(CompressionModel[_EncodecState]):
-    """Encodec model operating on the raw waveform.
+class MimiModel(CompressionModel[_MimiState]):
+    """Mimi model operating on the raw waveform.
 
     Args:
         encoder (nn.Module): Encoder network.
@@ -122,6 +123,7 @@ class EncodecModel(CompressionModel[_EncodecState]):
         torch_compile_encoder_decoder (bool): if True, uses torch.compile on the encoder / decoder.
             Deactivated by default for training as this is incompatible at the moment with weight norm.
             See https://github.com/pytorch/pytorch/issues/121902
+            Also this seems to work well with 2.2.0, but completely fail with 2.4.0.
     """
 
     def __init__(
@@ -217,14 +219,16 @@ def __init__(
                     channel_wise=upsample_channel_wise_bug,
                 )
 
-    def _init_streaming_state(self, batch_size: int) -> _EncodecState:
+    def _init_streaming_state(self, batch_size: int) -> _MimiState:
+        device = next(self.parameters()).device
+        disable = device.type != 'cuda'
         graphed_tr_dec = None
         graphed_tr_enc = None
         if self.encoder_transformer is not None:
-            graphed_tr_enc = CUDAGraphed(self.encoder_transformer)
+            graphed_tr_enc = CUDAGraphed(self.encoder_transformer, disable=disable)
         if self.decoder_transformer is not None:
-            graphed_tr_dec = CUDAGraphed(self.decoder_transformer)
-        return _EncodecState(graphed_tr_enc, graphed_tr_dec)
+            graphed_tr_dec = CUDAGraphed(self.decoder_transformer, disable=disable)
+        return _MimiState(graphed_tr_enc, graphed_tr_dec)
 
     @property
     def channels(self) -> int:
@@ -368,7 +372,8 @@ def encode(self, x: torch.Tensor) -> torch.Tensor:
             x (torch.Tensor): Float tensor of shape [B, C, T]
 
         Returns:
-            codes (torch.Tensor): an int tensor of shape [B, K, T] with K the number of codebooks used and T the timestep.
+            codes (torch.Tensor): an int tensor of shape [B, K, T]
+                with K the number of codebooks used and T the timestep.
         """
         emb = self._encode_to_unquantized_latent(x)
         codes = self.quantizer.encode(emb)
diff --git a/moshi/moshi/models/lm.py b/moshi/moshi/models/lm.py
index 58ad922..8cca181 100644
--- a/moshi/moshi/models/lm.py
+++ b/moshi/moshi/models/lm.py
@@ -372,8 +372,9 @@ def _init_streaming_state(self, batch_size: int) -> _LMGenState:
             dtype=torch.long,
         )
 
-        graphed_main = CUDAGraphed(lm_model.forward_text)
-        graphed_depth = CUDAGraphed(self.depformer_step)
+        disable = lm_model.device.type != 'cuda'
+        graphed_main = CUDAGraphed(lm_model.forward_text, disable=disable)
+        graphed_depth = CUDAGraphed(self.depformer_step, disable=disable)
 
         return _LMGenState(cache, initial, graphed_main, graphed_depth)
 
diff --git a/moshi/moshi/models/loaders.py b/moshi/moshi/models/loaders.py
index d136200..1917694 100644
--- a/moshi/moshi/models/loaders.py
+++ b/moshi/moshi/models/loaders.py
@@ -1,21 +1,29 @@
 # Copyright (c) Kyutai, all rights reserved.
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
+"""Retrieves the pretrained models for Moshi and Mimi."""
+from pathlib import Path
 
-from ..modules import SEANetEncoder, SEANetDecoder, transformer
-from .encodec import EncodecModel
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_model
+import sentencepiece
+import torch
+
+from .compression import MimiModel
 from .lm import LMModel
+from ..modules import SEANetEncoder, SEANetDecoder, transformer
 from ..quantization import SplitResidualVectorQuantizer
-import torch
-from safetensors.torch import load_model
-from pathlib import Path
-import typing as tp
 
 SAMPLE_RATE = 24000
 FRAME_RATE = 12.5
+HF_REPO = 'kmhf/msh-v0.1'
+MIMI_V0_1 = 'tokenizer-e351c8d8-checkpoint125.safetensors'
+MOSHIKO_V0_1 = 'moshiko_pt_301e30bf@120.safetensors'
+MOSHIKA_V0_1 = 'moshika_pt_3d736a96@120.safetensors'
+TEXT_TOKENIZER_V0_1 = 'tokenizer_spm_32k_3.model'
 
 
-seanet_kwargs = {
+_seanet_kwargs = {
     "channels": 1,
     "dimension": 512,
     "causal": True,
@@ -35,15 +43,15 @@
     "ratios": [8, 6, 5, 4],
     "true_skip": True,
 }
-quantizer_kwargs = {
+_quantizer_kwargs = {
     "dimension": 256,
     "n_q": 32,
     "bins": 2048,
-    "input_dimension": seanet_kwargs["dimension"],
-    "output_dimension": seanet_kwargs["dimension"],
+    "input_dimension": _seanet_kwargs["dimension"],
+    "output_dimension": _seanet_kwargs["dimension"],
 }
-transformer_kwargs = {
-    "d_model": seanet_kwargs["dimension"],
+_transformer_kwargs = {
+    "d_model": _seanet_kwargs["dimension"],
     "num_heads": 8,
     "num_layers": 8,
     "causal": True,
@@ -55,17 +63,17 @@
     "norm": "layer_norm",
     "positional_embedding": "rope",
     "dim_feedforward": 2048,
-    "input_dimension": seanet_kwargs["dimension"],
-    "output_dimensions": [seanet_kwargs["dimension"]],
+    "input_dimension": _seanet_kwargs["dimension"],
+    "output_dimensions": [_seanet_kwargs["dimension"]],
 }
 
-lm_kwargs = {
+_lm_kwargs = {
     "dim": 4096,
     "text_card": 32000,
     "existing_text_padding_id": 3,
     "n_q": 16,
     "dep_q": 8,
-    "card": quantizer_kwargs["bins"],
+    "card": _quantizer_kwargs["bins"],
     "num_heads": 32,
     "num_layers": 32,
     "hidden_scale": 4.125,
@@ -92,24 +100,40 @@
 }
 
 
-def _is_safetensors(filename: tp.Union[str, Path]) -> bool:
-    filename = Path(filename)
-    return filename.suffix in (".safetensors", ".sft", ".sfts")
+def _is_safetensors(path: Path | str) -> bool:
+    return Path(path).suffix in (".safetensors", ".sft", ".sfts")
 
 
-def get_encodec(filename: tp.Union[str, Path], device):
-    encoder = SEANetEncoder(**seanet_kwargs)
-    decoder = SEANetDecoder(**seanet_kwargs)
+def resolve_model_checkpoint(name: str, hf_repo: str = HF_REPO, allow_local_file: bool = True) -> Path:
+    """Load a model checkpoint from HF.
+    If `allow_local_file` is True, then if a file `name` exists, it will be used instead.
+    """
+    if allow_local_file and Path(name).exists():
+        return Path(name)
+    else:
+        filename = name
+    return Path(hf_hub_download(hf_repo, filename))
+
+
+def get_text_tokenizer(filename: str | Path) -> sentencepiece.SentencePieceProcessor:
+    return sentencepiece.SentencePieceProcessor(str(filename))  # type: ignore
+
+
+def get_mimi(filename: str | Path,
+             device: torch.device | str = 'cpu') -> MimiModel:
+    """Return a pretrained Mimi model."""
+    encoder = SEANetEncoder(**_seanet_kwargs)
+    decoder = SEANetDecoder(**_seanet_kwargs)
     encoder_transformer = transformer.ProjectedTransformer(
-        device=device, **transformer_kwargs
+        device=device, **_transformer_kwargs
     )
     decoder_transformer = transformer.ProjectedTransformer(
-        device=device, **transformer_kwargs
+        device=device, **_transformer_kwargs
     )
     quantizer = SplitResidualVectorQuantizer(
-        **quantizer_kwargs,
+        **_quantizer_kwargs,
     )
-    model = EncodecModel(
+    model = MimiModel(
         encoder,
         decoder,
         quantizer,
@@ -126,21 +150,19 @@ def get_encodec(filename: tp.Union[str, Path], device):
     if _is_safetensors(filename):
         load_model(model, filename)
     else:
-        pkg = torch.load(
-            filename,
-            "cpu",
-        )
+        pkg = torch.load(filename, "cpu")
         model.load_state_dict(pkg["model"])
     model.set_num_codebooks(8)
     return model
 
 
-def get_lm(filename: tp.Union[str, Path], device):
+def get_moshi_lm(filename: str | Path,
+                 device: torch.device | str = 'cpu') -> LMModel:
     dtype = torch.bfloat16
     model = LMModel(
         device=device,
         dtype=dtype,
-        **lm_kwargs,
+        **_lm_kwargs,
     ).to(device=device, dtype=dtype)
     model.eval()
     if _is_safetensors(filename):
diff --git a/moshi/moshi/modules/seanet.py b/moshi/moshi/modules/seanet.py
index 0fe706b..1d8ff28 100644
--- a/moshi/moshi/modules/seanet.py
+++ b/moshi/moshi/modules/seanet.py
@@ -159,8 +159,7 @@ def __init__(
         self.n_blocks = len(self.ratios) + 2  # first and last conv + residual blocks
         self.disable_norm_outer_blocks = disable_norm_outer_blocks
         assert (
-            self.disable_norm_outer_blocks >= 0
-            and self.disable_norm_outer_blocks <= self.n_blocks
+            self.disable_norm_outer_blocks >= 0 and self.disable_norm_outer_blocks <= self.n_blocks
         ), (
             "Number of blocks for which to disable norm is invalid."
             "It should be lower or equal to the actual number of blocks in the network and greater or equal to 0."
@@ -307,8 +306,7 @@ def __init__(
         self.n_blocks = len(self.ratios) + 2  # first and last conv + residual blocks
         self.disable_norm_outer_blocks = disable_norm_outer_blocks
         assert (
-            self.disable_norm_outer_blocks >= 0
-            and self.disable_norm_outer_blocks <= self.n_blocks
+            self.disable_norm_outer_blocks >= 0 and self.disable_norm_outer_blocks <= self.n_blocks
         ), (
             "Number of blocks for which to disable norm is invalid."
             "It should be lower or equal to the actual number of blocks in the network and greater or equal to 0."
diff --git a/moshi/moshi/modules/transformer.py b/moshi/moshi/modules/transformer.py
index d3d0ede..84d1952 100644
--- a/moshi/moshi/modules/transformer.py
+++ b/moshi/moshi/modules/transformer.py
@@ -9,6 +9,7 @@
 See `StreamingTransformer` for more information.
 """
 
+from contextlib import ExitStack
 from dataclasses import dataclass
 import typing as tp
 
@@ -17,6 +18,7 @@
 import torch.nn as nn
 from torch.nn import functional as F
 
+from ..utils.compile import no_compile
 from .gating import make_gating
 from .rope import RotaryEmbedding
 from .streaming import StreamingModule, StreamingContainer
@@ -240,10 +242,7 @@ def reset(self):
     def complete(self, k: torch.Tensor, v: torch.Tensor) -> KVCacheResult:
         assert k.shape[:-1] == v.shape[:-1], (k.shape, v.shape)
         B, H, T, D = k.shape
-        indexes = (
-            torch.arange(T, device=self.end_offset.device, dtype=self.end_offset.dtype)
-            + self.end_offset
-        )
+        indexes = torch.arange(T, device=self.end_offset.device, dtype=self.end_offset.dtype) + self.end_offset
         indexes = indexes % self.capacity
         self.cache[0].index_copy_(2, indexes, k)
         self.cache[1].index_copy_(2, indexes, v)
@@ -485,8 +484,8 @@ def __init__(
                 context=context,
                 rope=rope,
                 weights_per_step=weights_per_step,
-                **attn_kwargs,
-                **factory_kwargs,
+                **attn_kwargs,  # type: ignore
+                **factory_kwargs,  # type: ignore
             )  # type: ignore
             self.norm1 = create_norm_fn(norm, d_model, **factory_kwargs)
         self.norm2 = create_norm_fn(norm, d_model, **factory_kwargs)
@@ -542,8 +541,8 @@ def __init__(
             self.layer_scale_1 = nn.Identity()
             self.layer_scale_2 = nn.Identity()
         else:
-            self.layer_scale_1 = LayerScale(d_model, layer_scale, **factory_kwargs)
-            self.layer_scale_2 = LayerScale(d_model, layer_scale, **factory_kwargs)
+            self.layer_scale_1 = LayerScale(d_model, layer_scale, **factory_kwargs)  # type: ignore
+            self.layer_scale_2 = LayerScale(d_model, layer_scale, **factory_kwargs)  # type: ignore
 
     def _init_streaming_state(self, batch_size: int) -> _LayerState:
         return _LayerState(offset_cpu=0)
@@ -582,12 +581,15 @@ def _sa_block(self, x: torch.Tensor):
         return x_orig + self.layer_scale_1(update)
 
     def forward(self, x: torch.Tensor):
-        x = self._sa_block(x)
-        x = self._ff_block(x)
-        state = self._streaming_state
-        if state:
-            state.offset_cpu += x.shape[1]
-        return x
+        with ExitStack() as stack:
+            if x.device.type != 'cuda':
+                stack.enter_context(no_compile())
+            x = self._sa_block(x)
+            x = self._ff_block(x)
+            state = self._streaming_state
+            if state:
+                state.offset_cpu += x.shape[1]
+            return x
 
 
 @dataclass
diff --git a/moshi/moshi/quantization/base.py b/moshi/moshi/quantization/base.py
index e8f0ad4..02228a9 100644
--- a/moshi/moshi/quantization/base.py
+++ b/moshi/moshi/quantization/base.py
@@ -68,7 +68,7 @@ def num_codebooks(self) -> int:
         raise NotImplementedError()
 
     @property
-    def semantic_quantizer(self):
+    def semantic_quantizer(self) -> 'BaseQuantizer':
         """This returns the quantizer that models the first level of the hierarchy (typically semantic).
 
         In this case, it's the quantizer itself.
@@ -76,7 +76,7 @@ def semantic_quantizer(self):
         return self
 
     @property
-    def acoustic_quantizer(self):
+    def acoustic_quantizer(self) -> 'BaseQuantizer':
         """This returns the quantizer that models the higher levels of the hierarchy (typically acoustic).
 
         In this case, it's the quantizer itself.
diff --git a/moshi/moshi/quantization/core_vq.py b/moshi/moshi/quantization/core_vq.py
index 670b3a9..54abb5b 100644
--- a/moshi/moshi/quantization/core_vq.py
+++ b/moshi/moshi/quantization/core_vq.py
@@ -8,10 +8,9 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
-import math
 import typing as tp
 
-from einops import rearrange, repeat
+from einops import rearrange
 import torch
 from torch import nn
 from torch import distributed
@@ -339,7 +338,7 @@ def forward(
         n_q = n_q or len(self.layers)
         previous_layer_is_initialized = True
 
-        for i, layer in enumerate(self.layers[:n_q]):
+        for i, layer in enumerate(self.layers[:n_q]):  # type: ignore
             quantized, codes, loss, metrics = layer(
                 residual, initialize=previous_layer_is_initialized
             )
@@ -366,7 +365,7 @@ def encode(self, x: torch.Tensor, n_q: tp.Optional[int] = None) -> torch.Tensor:
         residual = x
         all_indices = []
         n_q = n_q or len(self.layers)
-        for layer in self.layers[:n_q]:
+        for layer in self.layers[:n_q]:  # type: ignore
             indices = layer.encode(residual)
             quantized = layer.decode(indices)
             residual = residual - quantized
diff --git a/moshi/moshi/quantization/vq.py b/moshi/moshi/quantization/vq.py
index 0e436c1..4fa5b0a 100644
--- a/moshi/moshi/quantization/vq.py
+++ b/moshi/moshi/quantization/vq.py
@@ -321,12 +321,12 @@ def dimension(self):
         return self.rvq_first.dimension
 
     @property
-    def semantic_quantizer(self):
+    def semantic_quantizer(self) -> ResidualVectorQuantizer:
         """This returns the quantizer that models the first level of the hierarchy (typically semantic)."""
         return self.rvq_first
 
     @property
-    def acoustic_quantizer(self):
+    def acoustic_quantizer(self) -> ResidualVectorQuantizer:
         """This returns the quantizer that models the higher levels of the hierarchy (typically acoustic)."""
         return self.rvq_rest
 
diff --git a/moshi/moshi/server.py b/moshi/moshi/server.py
index 70dfbff..a67a332 100644
--- a/moshi/moshi/server.py
+++ b/moshi/moshi/server.py
@@ -5,67 +5,29 @@
 import argparse
 import asyncio
 from dataclasses import dataclass
-from pathlib import Path
 import random
+import os
+from pathlib import Path
 import tarfile
 import time
+import secrets
+import sys
 
-import os
+import aiohttp
+from aiohttp import web
+from huggingface_hub import hf_hub_download
 import numpy as np
 import sentencepiece
 import sphn
 import torch
-import aiohttp
-from aiohttp import web
-
-from huggingface_hub import hf_hub_download
-
-from .models import moshi_, EncodecModel, LMGen
 
-SAMPLE_RATE = moshi_.SAMPLE_RATE
-DEVICE = "cuda:0"
-ENABLE_PROFILING = False
 
-
-def colorize(text, color):
-    code = f"\033[{color}m"
-    restore = "\033[0m"
-    return "".join([code, text, restore])
+from .client_utils import make_log
+from .models import loaders, MimiModel, LMModel, LMGen
 
 
 def log(level: str, msg: str):
-    if level == "warning":
-        prefix = colorize("[Warn]", "1;31")
-    elif level == "info":
-        prefix = colorize("[Info]", "1;34")
-    elif level == "error":
-        prefix = colorize("[Err ]", "1;31")
-    else:
-        raise ValueError(f"Unknown level {level}")
-    print(prefix + " " + msg)
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--host", default="localhost", type=str)
-parser.add_argument("--port", default=8998, type=int)
-parser.add_argument("--static", type=str)
-parser.add_argument("--tokenizer", type=str)
-parser.add_argument("--moshi-weights", type=str)
-parser.add_argument("--mimi-weights", type=str)
-parser.add_argument("--hf-repo", type=str, default="kmhf/msh-v0.1")
-
-args = parser.parse_args()
-
-if args.tokenizer is None:
-    args.tokenizer = hf_hub_download(args.hf_repo, "tokenizer_spm_32k_3.model")
-if args.moshi_weights is None:
-    args.moshi_weights = hf_hub_download(
-        args.hf_repo, "moshiko_pt_301e30bf@120.safetensors"
-    )
-if args.mimi_weights is None:
-    args.mimi_weights = hf_hub_download(
-        args.hf_repo, "tokenizer-e351c8d8-checkpoint125.safetensors"
-    )
+    print(make_log(level, msg))
 
 
 def seed_all(seed):
@@ -79,43 +41,35 @@ def seed_all(seed):
     torch.backends.cudnn.benchmark = False
 
 
-seed_all(42424242)
-
-
 @dataclass
 class ServerState:
-    ec: EncodecModel
+    mimi: MimiModel
     text_tokenizer: sentencepiece.SentencePieceProcessor
     lm_gen: LMGen
     lock: asyncio.Lock
 
-    def __init__(self):
-        log("info", "loading mimi")
-        self.ec = moshi_.get_encodec(args.mimi_weights, DEVICE)
-        log("info", "mimi loaded")
-        self.text_tokenizer = sentencepiece.SentencePieceProcessor(args.tokenizer)
-        log("info", "loading moshi")
-        lm = moshi_.get_lm(args.moshi_weights, DEVICE)
+    def __init__(self, mimi: MimiModel, text_tokenizer: sentencepiece.SentencePieceProcessor,
+                 lm: LMModel, device: str | torch.device):
+        self.mimi = mimi
+        self.text_tokenizer = text_tokenizer
         self.lm_gen = LMGen(lm)
 
-        self.frame_size = int(self.ec.sample_rate / self.ec.frame_rate)
+        self.device = device
+        self.frame_size = int(self.mimi.sample_rate / self.mimi.frame_rate)
         self.lock = asyncio.Lock()
 
-        self.ec.streaming_forever(1)
+        self.mimi.streaming_forever(1)
         self.lm_gen.streaming_forever(1)
-        log("info", "lm loaded")
 
     def warmup(self):
         for chunk in range(4):
-            chunk = torch.zeros(
-                1, 1, self.frame_size, dtype=torch.float32, device=DEVICE
-            )
-            codes = self.ec.encode(chunk)
+            chunk = torch.zeros(1, 1, self.frame_size, dtype=torch.float32, device=self.device)
+            codes = self.mimi.encode(chunk)
             for c in range(codes.shape[-1]):
-                tokens = self.lm_gen.step(codes[:, :, c : c + 1])
+                tokens = self.lm_gen.step(codes[:, :, c: c + 1])
                 if tokens is None:
                     continue
-                _ = self.ec.decode(tokens[:, 1:])
+                _ = self.mimi.decode(tokens[:, 1:])
         torch.cuda.synchronize()
 
     async def handle_chat(self, request):
@@ -168,21 +122,21 @@ async def opus_loop():
                 while all_pcm_data.shape[-1] >= self.frame_size:
                     be = time.time()
                     chunk = all_pcm_data[: self.frame_size]
-                    all_pcm_data = all_pcm_data[self.frame_size :]
+                    all_pcm_data = all_pcm_data[self.frame_size:]
                     chunk = torch.from_numpy(chunk)
-                    chunk = chunk.to(device=DEVICE)[None, None]
-                    codes = self.ec.encode(chunk)
+                    chunk = chunk.to(device=self.device)[None, None]
+                    codes = self.mimi.encode(chunk)
                     for c in range(codes.shape[-1]):
-                        tokens = self.lm_gen.step(codes[:, :, c : c + 1])
+                        tokens = self.lm_gen.step(codes[:, :, c: c + 1])
                         if tokens is None:
                             continue
                         assert tokens.shape[1] == self.lm_gen.lm_model.dep_q + 1
-                        main_pcm = self.ec.decode(tokens[:, 1:])
+                        main_pcm = self.mimi.decode(tokens[:, 1:])
                         main_pcm = main_pcm.cpu()
                         opus_writer.append_pcm(main_pcm[0, 0].numpy())
                         text_token = tokens[0, 0, 0].item()
                         if text_token not in (0, 3):
-                            _text = self.text_tokenizer.id_to_piece(text_token)
+                            _text = self.text_tokenizer.id_to_piece(text_token)  # type: ignore
                             _text = _text.replace("▁", " ")
                             msg = b"\x02" + bytes(_text, encoding="utf8")
                             log("info", f"text token '{_text}'")
@@ -201,9 +155,9 @@ async def send_loop():
         log("info", "accepted connection")
         close = False
         async with self.lock:
-            opus_writer = sphn.OpusStreamWriter(self.ec.sample_rate)
-            opus_reader = sphn.OpusStreamReader(self.ec.sample_rate)
-            self.ec.reset_streaming()
+            opus_writer = sphn.OpusStreamWriter(self.mimi.sample_rate)
+            opus_reader = sphn.OpusStreamReader(self.mimi.sample_rate)
+            self.mimi.reset_streaming()
             self.lm_gen.reset_streaming()
             # Send the handshake.
             await ws.send_bytes(b"\x00")
@@ -213,14 +167,63 @@ async def send_loop():
 
 
 def main():
-    state = ServerState()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="localhost", type=str)
+    parser.add_argument("--port", default=8998, type=int)
+    parser.add_argument("--static", type=str)
+    parser.add_argument("--gradio_tunnel", action='store_true', help='Activate a gradio tunnel.')
+    parser.add_argument("--gradio_tunnel_token",
+                        help='Provide a custom (secret) token here to keep getting the same URL.')
+
+    parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1,
+                        help="Name of the text tokenizer file in the given HF repo, or path to a local file.")
+    parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1,
+                        help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.")
+    parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1,
+                        help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.")
+    parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO,
+                        help="HF repo to look into, defaults to Kyutai official one.")
+    parser.add_argument("--device", type=str, default="cuda", help="Device on which to run, defaults to 'cuda'.")
+
+    args = parser.parse_args()
+    seed_all(42424242)
+
+    setup_tunnel = None
+    tunnel_token = ''
+    if args.gradio_tunnel:
+        try:
+            from gradio import networking  # type: ignore
+        except ImportError:
+            log("error", "Cannot find gradio which is required to activate a tunnel. "
+                         "Please install with `pip install gradio`.")
+            sys.exit(1)
+        setup_tunnel = networking.setup_tunnel
+        if args.gradio_tunnel_token is None:
+            tunnel_token = secrets.token_urlsafe(32)
+        else:
+            tunnel_token = args.gradio_tunnel_token
+
+    log("info", "loading mimi")
+    mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo)
+    mimi = loaders.get_mimi(mimi_path, args.device)
+    log("info", "mimi loaded")
+
+    tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo)
+    text_tokenizer = loaders.get_text_tokenizer(tokenizer_path)
+
+    log("info", "loading moshi")
+    moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo)
+    lm = loaders.get_moshi_lm(moshi_path, args.device)
+    log("info", "moshi loaded")
+
+    state = ServerState(mimi, text_tokenizer, lm, args.device)
     log("info", "warming up the model")
     state.warmup()
     app = web.Application()
     app.router.add_get("/api/chat", state.handle_chat)
     static_path: None | str = None
     if args.static is None:
-        log("info", f"retrieving the static content")
+        log("info", "retrieving the static content")
         dist_tgz = hf_hub_download(args.hf_repo, "dist.tgz")
         dist_tgz = Path(dist_tgz)
         dist = dist_tgz.parent / "dist"
@@ -232,7 +235,6 @@ def main():
         # When set to the "none" string, we don't serve any static content.
         static_path = args.static
     if static_path is not None:
-
         async def handle_root(_):
             return web.FileResponse(os.path.join(static_path, "index.html"))
 
@@ -241,7 +243,11 @@ async def handle_root(_):
         app.router.add_static(
             "/", path=static_path, follow_symlinks=True, name="static"
         )
-    log("info", f"listening to ws://{args.host}:{args.port}")
+    log("info", f"Access the Web UI directly at http://{args.host}:{args.port}")
+    if setup_tunnel is not None:
+        tunnel = setup_tunnel('localhost', args.port, tunnel_token, None)
+        log("info", f"Tunnel started, if executing on a remote GPU, you can use {tunnel}.")
+        log("info", f"Note that this tunnel goes through the US and you might experience high latency in Europe.")
     web.run_app(app, port=args.port)
 
 
diff --git a/moshi/moshi/utils/compile.py b/moshi/moshi/utils/compile.py
index b47e987..780513b 100644
--- a/moshi/moshi/utils/compile.py
+++ b/moshi/moshi/utils/compile.py
@@ -23,7 +23,7 @@
 
 @contextmanager
 def no_compile():
-    """Disable torch.compile locally."""
+    """Disable torch.compile locally. Now Pytorch 2.4 provides a function to do that."""
     global _compile_disabled
 
     prev_disabled = _compile_disabled
@@ -194,11 +194,14 @@ class CUDAGraphed:
             be top level args, not nested in structures (tuples, dicts, etc). Keyword
             arguments are NOT supported for simplicity.
         warmup_steps: how many call to make normally before CUDA Graphing. In particular, this
-            allows torch.compiled functions to get properly compiled."""
+            allows torch.compiled functions to get properly compiled.
+        disabled: if True, just call the func directly, useful to quickly deactivate on CPU.
+    """
 
-    def __init__(self, func: tp.Callable, warmup_steps: int = 1):
+    def __init__(self, func: tp.Callable, warmup_steps: int = 1, disable: bool = False):
         self.func = func
         self.warmup_steps = warmup_steps
+        self.disable = disable
         self._graph: cuda.CUDAGraph | None = None
         self._output: tuple | None = None
         self._args: tuple | None = None
@@ -214,7 +217,7 @@ def reset(self, warmup_steps: int = 0) -> None:
     def __call__(self, *args, **kwargs) -> tp.Any:
         if kwargs:
             raise RuntimeError("Named arguments not supported for now.")
-        if not _is_cuda_graph_enabled() or in_cuda_graph():
+        if self.disable or not _is_cuda_graph_enabled() or in_cuda_graph():
             return self.func(*args, **kwargs)
 
         def _clone_tensors(args: tuple) -> tuple:
diff --git a/moshi/pyproject.toml b/moshi/pyproject.toml
index 61d4c54..d5b7578 100644
--- a/moshi/pyproject.toml
+++ b/moshi/pyproject.toml
@@ -1,10 +1,9 @@
 [project]
 name = "moshi"
-version = "0.0.1"
 requires-python = ">= 3.10"
 description = "Moshi is moshi"
 dependencies = [
-    "numpy >= 2.1.0, < 2.2",
+    "numpy >= 1.26, < 2.2",
     "safetensors >= 0.4.0, < 0.5",
     "huggingface-hub >= 0.24, < 0.25",
     "einops == 0.7",
@@ -17,11 +16,18 @@ dependencies = [
 authors = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
 maintainers = [{name="Laurent Mazaré", email="laurent@kyutai.org"}]
 license = {text = "MIT"}
+dynamic = ["version"]
 
+[tool.setuptools.dynamic]
+version = {attr = "moshi.__version__"}
 
 [build-system]
 requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools]
-packages = ["moshi", "moshi.utils", "moshi.modules", "moshi.models", "moshi.quantization"]
+[project.optional-dependencies]
+dev = [
+    "pyright",
+    "flake8",
+    "pre-commit",
+]
diff --git a/moshi/requirements.txt b/moshi/requirements.txt
index 9a93905..876de9d 100644
--- a/moshi/requirements.txt
+++ b/moshi/requirements.txt
@@ -5,5 +5,6 @@ sounddevice==0.5.0
 soundfile==0.12.1
 sphn==0.1.4
 torch==2.2.0
+numpy==1.26.4
 aiohttp>=3.10.5, <3.11
 huggingface-hub==0.24.6
diff --git a/moshi/setup.cfg b/moshi/setup.cfg
index dc7aa4b..5bccac4 100644
--- a/moshi/setup.cfg
+++ b/moshi/setup.cfg
@@ -3,3 +3,4 @@ max-line-length = 120
 
 [flake8]
 max-line-length = 120
+ignore = E203,E704
diff --git a/scripts/mimi_streaming_test.py b/scripts/mimi_streaming_test.py
index d71f07c..54865a3 100644
--- a/scripts/mimi_streaming_test.py
+++ b/scripts/mimi_streaming_test.py
@@ -3,20 +3,23 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import moshi
+import random
 import time
-import torch
+
+import numpy as np
 import sphn
+import torch
 from torch.profiler import profile, ProfilerActivity
-import numpy as np
-import random
 
-SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE
-DEVICE = "cuda:0"
-ENABLE_PROFILING = False
+from moshi.models import loaders
+
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--weights", type=str)
+parser.add_argument("--weights", type=str, default=loaders.MIMI_V0_1)
+parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO)
+parser.add_argument("--device", type=str,
+                    default='cuda' if torch.cuda.device_count() else 'cpu')
+parser.add_argument("--profile", action='store_true')
 args = parser.parse_args()
 
 
@@ -35,23 +38,27 @@ def seed_all(seed):
 
 
 print("loading mimi")
-ec = moshi.models.moshi.get_encodec(args.weights, DEVICE)
+mimi = loaders.get_mimi(
+    loaders.resolve_model_checkpoint(args.weights, args.hf_repo),
+    args.device)
 print("mimi loaded")
 
 
-def encodec_streaming_test(ec, pcm_chunk_size=1920, max_duration_sec=10.0):
+def mimi_streaming_test(mimi, max_duration_sec=10.0):
+    pcm_chunk_size = int(mimi.sample_rate / mimi.frame_rate)
     # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
     sample_pcm, sample_sr = sphn.read("bria.mp3")
+    sample_rate = mimi.sample_rate
     print("loaded pcm", sample_pcm.shape, sample_sr)
     sample_pcm = sphn.resample(
-        sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=SAMPLE_RATE
+        sample_pcm, src_sample_rate=sample_sr, dst_sample_rate=sample_rate
     )
-    sample_pcm = torch.tensor(sample_pcm, device=DEVICE)
-    max_duration_len = int(SAMPLE_RATE * max_duration_sec)
+    sample_pcm = torch.tensor(sample_pcm, device=args.device)
+    max_duration_len = int(sample_rate * max_duration_sec)
     if sample_pcm.shape[-1] > max_duration_len:
         sample_pcm = sample_pcm[..., :max_duration_len]
     print("resampled pcm", sample_pcm.shape, sample_sr)
-    sample_pcm = sample_pcm[None].to(device=DEVICE)
+    sample_pcm = sample_pcm[None].to(device=args.device)
 
     print("streaming encoding...")
     start_time = time.time()
@@ -61,34 +68,34 @@ def run_loop():
         for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size):
             end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size)
             chunk = sample_pcm[..., start_idx:end_idx]
-            codes, _scale = ec.encode(chunk)
+            codes = mimi.encode(chunk)
             if codes.shape[-1]:
                 print(start_idx, codes.shape, end="\r")
                 all_codes.append(codes)
 
-    if ENABLE_PROFILING:
+    if args.profile:
         with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
             run_loop()
         prof.export_chrome_trace("trace.json")
     else:
         run_loop()
-    all_codes = torch.cat(all_codes, dim=-1)
-    print(f"codes {all_codes.shape} generated in {time.time() - start_time:.2f}s")
+    all_codes_th = torch.cat(all_codes, dim=-1)
+    print(f"codes {all_codes_th.shape} generated in {time.time() - start_time:.2f}s")
     print("streaming decoding...")
     all_pcms = []
-    with ec.streaming():
-        for i in range(all_codes.shape[-1]):
-            codes = all_codes[..., i : i + 1]
-            pcm = ec.decode(codes, scale=None)
+    with mimi.streaming(1):
+        for i in range(all_codes_th.shape[-1]):
+            codes = all_codes_th[..., i : i + 1]
+            pcm = mimi.decode(codes)
             print(i, pcm.shape, end="\r")
             all_pcms.append(pcm)
     all_pcms = torch.cat(all_pcms, dim=-1)
     print("pcm", all_pcms.shape, all_pcms.dtype)
-    sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), SAMPLE_RATE)
-    pcm = ec.decode(all_codes, scale=None)
+    sphn.write_wav("streaming_out.wav", all_pcms[0, 0].cpu().numpy(), sample_rate)
+    pcm = mimi.decode(all_codes_th)
     print("pcm", pcm.shape, pcm.dtype)
-    sphn.write_wav.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), SAMPLE_RATE)
+    sphn.write_wav("roundtrip_out.wav", pcm[0, 0].cpu().numpy(), sample_rate)
 
 
 with torch.no_grad():
-    encodec_streaming_test(ec)
+    mimi_streaming_test(mimi)
diff --git a/scripts/moshi_benchmark.py b/scripts/moshi_benchmark.py
index 056542f..0bd015b 100644
--- a/scripts/moshi_benchmark.py
+++ b/scripts/moshi_benchmark.py
@@ -3,26 +3,30 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
-import moshi
-import sentencepiece
-import torch
-import sphn
-import numpy as np
 import random
 import time
 
+import numpy as np
+import sentencepiece
+import sphn
+import torch
 from torch.profiler import profile, ProfilerActivity
 
-SAMPLE_RATE = moshi.models.moshi.SAMPLE_RATE
-DEVICE = "cuda:0"
-ENABLE_PROFILING = False
+from moshi.models import loaders, LMGen
+
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--tokenizer", type=str)
-parser.add_argument("--moshi-weights", type=str)
-parser.add_argument("--mimi-weights", type=str)
+parser.add_argument("--tokenizer", type=str, default=loaders.TEXT_TOKENIZER_V0_1,
+                    help="Name of the text tokenizer file in the given HF repo, or path to a local file.")
+parser.add_argument("--moshi-weight", type=str, default=loaders.MOSHIKO_V0_1,
+                    help="Name of the Moshi checkpoint in the given HF repo, or path to a local file.")
+parser.add_argument("--mimi-weight", type=str, default=loaders.MIMI_V0_1,
+                    help="Name of the Mimi checkpoint in the given HF repo, or path to a local file.")
+parser.add_argument("--hf-repo", type=str, default=loaders.HF_REPO,
+                    help="HF repo to look into, defaults to Kyutai official one.")
 parser.add_argument("--steps", default=100, type=int)
 parser.add_argument("--profile", action="store_true")
+parser.add_argument("--device", type=str, default='cuda')
 args = parser.parse_args()
 
 
@@ -39,52 +43,53 @@ def seed_all(seed):
 
 seed_all(42424242)
 
+tokenizer_path = loaders.resolve_model_checkpoint(args.tokenizer, args.hf_repo)
+text_tokenizer = loaders.get_text_tokenizer(tokenizer_path)
 
 print("loading mimi")
-ec = moshi.models.moshi.get_encodec(args.mimi_weights, DEVICE)
+mimi_path = loaders.resolve_model_checkpoint(args.mimi_weight, args.hf_repo)
+mimi = loaders.get_mimi(mimi_path, args.device)
 print("mimi loaded")
-text_tokenizer = sentencepiece.SentencePieceProcessor(args.tokenizer)
 
 print("loading moshi")
-lm = moshi.models.moshi.get_lm(args.moshi_weights, DEVICE)
-lm.to(torch.bfloat16)
+moshi_path = loaders.resolve_model_checkpoint(args.moshi_weight, args.hf_repo)
+lm = loaders.get_moshi_lm(moshi_path, args.device)
+lm_gen = LMGen(lm)
 print("lm loaded")
 
-lm_gen = moshi.models.LMGen(lm)
-
 
 def cb(step, total):
     print(f"{step:06d} / {total:06d}", end="\r")
 
 
 def streaming_test(bs):
-
     main_audio = []
     main_text = []
 
+    frame_size = int(mimi.sample_rate / mimi.frame_rate)
+
     def run_step():
         start_time = time.time()
         # Chunk should contain the pcm data from the user, single channel with a sample rate of 24000.
-        chunk = torch.zeros((bs, 1, 1920), dtype=torch.float, device=DEVICE)
-        codes = ec.encode(chunk)
+        chunk = torch.zeros((bs, 1, frame_size), dtype=torch.float, device=args.device)
+        codes = mimi.encode(chunk)
         assert codes.shape[-1] == 1
-        for c in range(codes.shape[-1]):
-            be = time.time()
-            ev = torch.cuda.Event(enable_timing=True)
-            ev.record()
-            tokens = lm_gen.step(codes[:, :, c : c + 1])
-            if tokens is None:
-                print("Skipping")
-                return
-            evb = torch.cuda.Event(enable_timing=True)
-            evb.record()
-            dt_step = time.time() - be
-            text_tokens = tokens[:, 0, 0]
-            audio_tokens = tokens[:, 1:, :]
-            main_pcm = ec.decode(audio_tokens)
-            # main_pcm is the audio to be played back to the user, here we just append it and store it in
-            # a file once the loop is finished.
-            main_audio.append(main_pcm[0])
+        be = time.time()
+        ev = torch.cuda.Event(enable_timing=True)
+        ev.record()
+        tokens = lm_gen.step(codes[:, :, :1])
+        if tokens is None:
+            print("Skipping")
+            return
+        evb = torch.cuda.Event(enable_timing=True)
+        evb.record()
+        dt_step = time.time() - be
+        text_tokens = tokens[:, 0, 0]
+        audio_tokens = tokens[:, 1:, :]
+        main_pcm = mimi.decode(audio_tokens)
+        # main_pcm is the audio to be played back to the user, here we just append it and store it in
+        # a file once the loop is finished.
+        main_audio.append(main_pcm[0])
         evb.synchronize()
         dg = ev.elapsed_time(evb)
         torch.cuda.synchronize()
@@ -109,17 +114,17 @@ def run_step():
                 run_step()
         print()
         prof.export_chrome_trace("trace.json")
-    main_audio = torch.cat(main_audio, dim=-1)
-    print(main_audio.shape)
+    main_audio_th = torch.cat(main_audio, dim=-1)
+    print(main_audio_th.shape)
     print("generated text:")
     print("".join(main_text))
     sphn.write_wav(
-        "gen_main.wav", main_audio[0].cpu().numpy().astype(np.float32), SAMPLE_RATE
+        "gen_main.wav", main_audio_th[0].cpu().numpy().astype(np.float32), mimi.sample_rate
     )
 
 
 print("streaming test")
 bs = 1
 with torch.no_grad():
-    with ec.streaming(bs), lm_gen.streaming(bs):
+    with mimi.streaming(bs), lm_gen.streaming(bs):
         streaming_test(bs)
diff --git a/scripts/setup.cfg b/scripts/setup.cfg
index dc7aa4b..5bccac4 100755
--- a/scripts/setup.cfg
+++ b/scripts/setup.cfg
@@ -3,3 +3,4 @@ max-line-length = 120
 
 [flake8]
 max-line-length = 120
+ignore = E203,E704