Update documents

mochi-neko · Jul 11, 2023 · 6a81a0f · 6a81a0f
1 parent 5d00f6e
commit 6a81a0f
Show file tree

Hide file tree

Showing 10 changed files with 42 additions and 24 deletions.
diff --git a/Assets/Mochineko/VoiceActivityDetection/Mochineko.VoiceActivityDetection.asmdef b/Assets/Mochineko/VoiceActivityDetection/Mochineko.VoiceActivityDetection.asmdef
@@ -2,8 +2,6 @@
     "name": "Mochineko.VoiceActivityDetection",
     "rootNamespace": "",
     "references": [
-        "GUID:cc4a20372913d4ba9bc6f1f4d3837ac3",
-        "GUID:3ed995982eec04b2e9d304b5b6242945",
         "GUID:9dc703687ff9843c78c2eab70b2e26ff",
         "GUID:560b04d1a97f54a4e82edc0cbbb69285",
         "GUID:f51ebe6a0ceec4240a699833d6309b23",

diff --git a/Assets/Mochineko/VoiceActivityDetection/VoiceSegment.cs b/Assets/Mochineko/VoiceActivityDetection/VoiceSegment.cs
@@ -53,6 +53,10 @@ public float Volume()
             return Mathf.Sqrt(sum / length); // Root mean square
         }
 
+        /// <summary>
+        /// Copies deeply this voice segment data.
+        /// </summary>
+        /// <returns></returns>
         public VoiceSegment Copy()
         {
             var copy = new float[length];

diff --git a/Assets/Mochineko/VoiceActivityDetection/VoiceSegmentActivityQueue.cs b/Assets/Mochineko/VoiceActivityDetection/VoiceSegmentActivityQueue.cs
@@ -5,13 +5,13 @@
 namespace Mochineko.VoiceActivityDetection
 {
     /// <summary>
-    /// Queue of voice segment activity.
+    /// A queue of <see cref="VoiceSegmentActivity"/>.
     /// </summary>
     internal sealed class VoiceSegmentActivityQueue
     {
         private readonly ConcurrentQueue<VoiceSegmentActivity> queue = new();
         private readonly float maxQueueingTimeSeconds;
-
+        
         public float TotalTimeSeconds { get; private set; }
 
         public VoiceSegmentActivityQueue(float maxQueueingTimeSeconds)

diff --git a/Assets/Mochineko/VoiceActivityDetection/VoiceSegmentQueue.cs b/Assets/Mochineko/VoiceActivityDetection/VoiceSegmentQueue.cs
@@ -3,6 +3,9 @@
 
 namespace Mochineko.VoiceActivityDetection
 {
+    /// <summary>
+    /// A queue of <see cref="VoiceSegment"/>.
+    /// </summary>
     internal sealed class VoiceSegmentQueue
     {
         private readonly ConcurrentQueue<VoiceSegment> queue = new();

diff --git a/Assets/Mochineko/VoiceActivityDetection/WaveVoiceBuffer.cs b/Assets/Mochineko/VoiceActivityDetection/WaveVoiceBuffer.cs
@@ -101,7 +101,7 @@ async UniTask IVoiceBuffer.OnInactiveAsync(CancellationToken cancellationToken)
 
             await writer.FlushAsync(cancellationToken);
 
-            // NOTE: Please dispose stream by receiver.
+            // NOTE: Please dispose copied stream by receiver.
             var copiedStream = new MemoryStream();
             stream.Seek(offset: 0, SeekOrigin.Begin);
             await stream.CopyToAsync(copiedStream, cancellationToken);

diff --git a/Assets/Mochineko/VoiceActivityDetection/package.json b/Assets/Mochineko/VoiceActivityDetection/package.json
@@ -1,6 +1,6 @@
 {
   "name": "com.mochineko.voice-activity-detection",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "displayName": "Voice Activity Detection",
   "description": "A voice activity detection library for Unity.",
   "unity": "2022.3",
@@ -15,7 +15,6 @@
   "dependencies": {
     "com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask",
     "com.neuecc.unirx": "https://github.com/neuecc/UniRx.git?path=Assets/Plugins/UniRx/Scripts",
-    "com.mochineko.simple-audio-codec-unity": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
     "com.naudio.core": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/NAudio/NAudio.Core#0.2.0",
     "com.unity.logging": "1.0.11"
   }

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.0] - 2023-07-11
+
+## Added
+- Add echo sample to test VAD with hearing microphone audio.
+- Add composite buffer to combine multiple buffers.
+- Add validations of `UnityEngine.Microphone`.
+
+## Changed
+- Improve VAD logic and parameters.
+- Improve recording of `UnityEngine.Microphone` when just before activated.
+
+## Fixed
+- Fix data duplication of `UnityEngine.Microphone` when recording position is not changed.
+
 ## [0.1.0] - 2023-07-05
 
 ### Added

diff --git a/Packages/manifest.json b/Packages/manifest.json
@@ -3,7 +3,6 @@
     "com.boundfoxstudios.fluentassertions": "https://github.com/BoundfoxStudios/fluentassertions-unity.git#upm",
     "com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask",
     "com.neuecc.unirx": "https://github.com/neuecc/UniRx.git?path=Assets/Plugins/UniRx/Scripts",
-    "com.mochineko.simple-audio-codec-unity": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
     "com.naudio.core": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/NAudio/NAudio.Core#0.2.0",
     "com.mochineko.relent": "https://github.com/mochi-neko/Relent.git?path=/Assets/Mochineko/Relent#0.2.0",
     "com.mochineko.whisper-api": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/WhisperAPI#1.0.0",

diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json
@@ -21,13 +21,6 @@
       "dependencies": {},
       "hash": "5fd87387912763429e4c8008157ed6a654c3093c"
     },
-    "com.mochineko.simple-audio-codec-unity": {
-      "version": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
-      "depth": 0,
-      "source": "git",
-      "dependencies": {},
-      "hash": "fb76f048a78ef9fa4e0d00df6f184d1b7a64042b"
-    },
     "com.mochineko.whisper-api": {
       "version": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/WhisperAPI#1.0.0",
       "depth": 0,

diff --git a/README.md b/README.md
@@ -1,18 +1,25 @@
 # voice-activity-detection-unity
-A voice activity detection library for Unity.
+A voice activity detection (VAD) library for Unity.
 
 ## Features
 
+Records voice data from any sources (`IVoiceSource`, e.g. recording by `UnityEngine.Microphone`)
+ detects voice activity by any logic,
+ and provides voice data to any buffers (`IVoiceBuffer`, e.g. buffering to WAV file) when voice is active.
+
+You can customize voice sources, voice buffers,
+ and voice activity detection logics adjusting your use cases.
+
 - Sources
-  - [x] `UnityEngine.Microphone`
+  - [x] `UnityEngine.Microphone` -> [UnityMicrophoneSource](./Assets/Mochineko/VoiceActivityDetection/UnityMicrophoneSource.cs)
   - [ ] `AudioSource`
   - [ ] Native microphone
 - Buffers
-  - [x] Null (Detection only)
-  - [x] Wave file (by [simple-audio-codec-unity](https://github.com/mochi-neko/simple-audio-codec-unity) / [NAudio](https://github.com/naudio/NAudio))
-  - [ ] AudioClip
+  - [x] Null (Detection only) -> [NullVoiceBuffer](./Assets/Mochineko/VoiceActivityDetection/NullVoiceBuffer.cs)
+  - [x] Wave file (by [NAudio](https://github.com/naudio/NAudio)) -> [WaveFileVoiceBuffer](./Assets/Mochineko/VoiceActivityDetection/WaveVoiceBuffer.cs)
+  - [x] AudioClip -> [AudioClipBuffer](./Assets/Mochineko/VoiceActivityDetection/AudioClipBuffer.cs)
 - Voice activity detection logics
-  - [x] Queuing-based simple VAD logic
+  - [x] Queueing-based simple VAD logic -> [QueueingVoiceActivityDetector](./Assets/Mochineko/VoiceActivityDetection/QueueingVoiceActivityDetector.cs)
 
 ## How to import by UnityPackageManager
 
@@ -21,16 +28,17 @@ Add following dependencies to your `/Packages/manifest.json`.
 ```json
 {
     "dependencies": {
-        "com.mochineko.voice-activity-detection": "https://github.com/mochi-neko/voice-activity-detection-unity?path=/Assets/Mochineko/VoiceActivityDetection#0.1.0",
+        "com.mochineko.voice-activity-detection": "https://github.com/mochi-neko/voice-activity-detection-unity?path=/Assets/Mochineko/VoiceActivityDetection#0.2.0",
         ...
     }
 }
 ```
 
 ## Samples
 
-- [A simple implementation of VAD as component](./Assets/Mochineko/VoiceActivityDetection.Samples/VADSample.cs)
-- [A sample implementation of VAD with OpenAI/Whisper API](./Assets/Mochineko/VoiceActivityDetection.Samples/VADToWhisperSample.cs)
+- [VAD as component](./Assets/Mochineko/VoiceActivityDetection.Samples/VADSample.cs)
+- [VAD with echo](./Assets/Mochineko/VoiceActivityDetection.Samples/VADAudioClipEchoSample.cs)
+- [VAD with OpenAI/Whisper API transcription](./Assets/Mochineko/VoiceActivityDetection.Samples/VADToWhisperSample.cs)
 
 See also [Samples](./Assets/Mochineko/VoiceActivityDetection.Samples).