dumb dockerfile test, add preliminary docs

mudler · Dec 7, 2023 · 0f5b01b · 0f5b01b
1 parent 43fb8ef
commit 0f5b01b
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 3 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -177,9 +177,6 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
     ; fi
-RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
-    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
     ; fi
@@ -192,6 +189,10 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
     ; fi
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
+    pwd; \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
+    ; fi
 
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \

diff --git a/docs/content/features/text-to-audio.md b/docs/content/features/text-to-audio.md
@@ -52,6 +52,20 @@ Note:
 - The model name is case sensitive.
 - LocalAI must be compiled with the `GO_TAGS=tts` flag.
 
+LocalAI also has experimental support for `transformers-musicgen` for the generation of short musical compositions. Currently, this is implemented via the same requests used for text to speech:
+
+```
+curl --request POST \
+  --url http://localhost:8080/tts \
+  --header 'Content-Type: application/json' \
+  --data '{
+    "backend": "transformers-musicgen",
+    "model": "facebook/musicgen-medium",
+    "input": "Cello Rave"
+}' | aplay```
+
+Future versions of LocalAI will expose additional control over audio generation beyond the text prompt.
+
 #### Configuration
 
 Audio models can be configured via `YAML` files. This allows to configure specific setting for each backend. For instance, backends might be specifying a voice or supports voice cloning which must be specified in the configuration file.