Skip to content

Commit

Permalink
Update documents
Browse files Browse the repository at this point in the history
  • Loading branch information
mochi-neko committed Jul 11, 2023
1 parent 5d00f6e commit 6a81a0f
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
"name": "Mochineko.VoiceActivityDetection",
"rootNamespace": "",
"references": [
"GUID:cc4a20372913d4ba9bc6f1f4d3837ac3",
"GUID:3ed995982eec04b2e9d304b5b6242945",
"GUID:9dc703687ff9843c78c2eab70b2e26ff",
"GUID:560b04d1a97f54a4e82edc0cbbb69285",
"GUID:f51ebe6a0ceec4240a699833d6309b23",
Expand Down
4 changes: 4 additions & 0 deletions Assets/Mochineko/VoiceActivityDetection/VoiceSegment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public float Volume()
return Mathf.Sqrt(sum / length); // Root mean square
}

/// <summary>
/// Copies deeply this voice segment data.
/// </summary>
/// <returns></returns>
public VoiceSegment Copy()
{
var copy = new float[length];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
namespace Mochineko.VoiceActivityDetection
{
/// <summary>
/// Queue of voice segment activity.
/// A queue of <see cref="VoiceSegmentActivity"/>.
/// </summary>
internal sealed class VoiceSegmentActivityQueue
{
private readonly ConcurrentQueue<VoiceSegmentActivity> queue = new();
private readonly float maxQueueingTimeSeconds;

public float TotalTimeSeconds { get; private set; }

public VoiceSegmentActivityQueue(float maxQueueingTimeSeconds)
Expand Down
3 changes: 3 additions & 0 deletions Assets/Mochineko/VoiceActivityDetection/VoiceSegmentQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

namespace Mochineko.VoiceActivityDetection
{
/// <summary>
/// A queue of <see cref="VoiceSegment"/>.
/// </summary>
internal sealed class VoiceSegmentQueue
{
private readonly ConcurrentQueue<VoiceSegment> queue = new();
Expand Down
2 changes: 1 addition & 1 deletion Assets/Mochineko/VoiceActivityDetection/WaveVoiceBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async UniTask IVoiceBuffer.OnInactiveAsync(CancellationToken cancellationToken)

await writer.FlushAsync(cancellationToken);

// NOTE: Please dispose stream by receiver.
// NOTE: Please dispose copied stream by receiver.
var copiedStream = new MemoryStream();
stream.Seek(offset: 0, SeekOrigin.Begin);
await stream.CopyToAsync(copiedStream, cancellationToken);
Expand Down
3 changes: 1 addition & 2 deletions Assets/Mochineko/VoiceActivityDetection/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "com.mochineko.voice-activity-detection",
"version": "0.1.0",
"version": "0.2.0",
"displayName": "Voice Activity Detection",
"description": "A voice activity detection library for Unity.",
"unity": "2022.3",
Expand All @@ -15,7 +15,6 @@
"dependencies": {
"com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask",
"com.neuecc.unirx": "https://github.com/neuecc/UniRx.git?path=Assets/Plugins/UniRx/Scripts",
"com.mochineko.simple-audio-codec-unity": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
"com.naudio.core": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/NAudio/NAudio.Core#0.2.0",
"com.unity.logging": "1.0.11"
}
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.2.0] - 2023-07-11

## Added
- Add echo sample to test VAD with hearing microphone audio.
- Add composite buffer to combine multiple buffers.
- Add validations of `UnityEngine.Microphone`.

## Changed
- Improve VAD logic and parameters.
- Improve recording of `UnityEngine.Microphone` when just before activated.

## Fixed
- Fix data duplication of `UnityEngine.Microphone` when recording position is not changed.

## [0.1.0] - 2023-07-05

### Added
Expand Down
1 change: 0 additions & 1 deletion Packages/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"com.boundfoxstudios.fluentassertions": "https://github.com/BoundfoxStudios/fluentassertions-unity.git#upm",
"com.cysharp.unitask": "https://github.com/Cysharp/UniTask.git?path=src/UniTask/Assets/Plugins/UniTask",
"com.neuecc.unirx": "https://github.com/neuecc/UniRx.git?path=Assets/Plugins/UniRx/Scripts",
"com.mochineko.simple-audio-codec-unity": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
"com.naudio.core": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/NAudio/NAudio.Core#0.2.0",
"com.mochineko.relent": "https://github.com/mochi-neko/Relent.git?path=/Assets/Mochineko/Relent#0.2.0",
"com.mochineko.whisper-api": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/WhisperAPI#1.0.0",
Expand Down
7 changes: 0 additions & 7 deletions Packages/packages-lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@
"dependencies": {},
"hash": "5fd87387912763429e4c8008157ed6a654c3093c"
},
"com.mochineko.simple-audio-codec-unity": {
"version": "https://github.com/mochi-neko/simple-audio-codec-unity.git?path=/Assets/Mochineko/SimpleAudioCodec#0.2.0",
"depth": 0,
"source": "git",
"dependencies": {},
"hash": "fb76f048a78ef9fa4e0d00df6f184d1b7a64042b"
},
"com.mochineko.whisper-api": {
"version": "https://github.com/mochi-neko/Whisper-API-unity.git?path=/Assets/Mochineko/WhisperAPI#1.0.0",
"depth": 0,
Expand Down
26 changes: 17 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# voice-activity-detection-unity
A voice activity detection library for Unity.
A voice activity detection (VAD) library for Unity.

## Features

Records voice data from any sources (`IVoiceSource`, e.g. recording by `UnityEngine.Microphone`)
detects voice activity by any logic,
and provides voice data to any buffers (`IVoiceBuffer`, e.g. buffering to WAV file) when voice is active.

You can customize voice sources, voice buffers,
and voice activity detection logics adjusting your use cases.

- Sources
- [x] `UnityEngine.Microphone`
- [x] `UnityEngine.Microphone` -> [UnityMicrophoneSource](./Assets/Mochineko/VoiceActivityDetection/UnityMicrophoneSource.cs)
- [ ] `AudioSource`
- [ ] Native microphone
- Buffers
- [x] Null (Detection only)
- [x] Wave file (by [simple-audio-codec-unity](https://github.com/mochi-neko/simple-audio-codec-unity) / [NAudio](https://github.com/naudio/NAudio))
- [ ] AudioClip
- [x] Null (Detection only) -> [NullVoiceBuffer](./Assets/Mochineko/VoiceActivityDetection/NullVoiceBuffer.cs)
- [x] Wave file (by [NAudio](https://github.com/naudio/NAudio)) -> [WaveFileVoiceBuffer](./Assets/Mochineko/VoiceActivityDetection/WaveVoiceBuffer.cs)
- [x] AudioClip -> [AudioClipBuffer](./Assets/Mochineko/VoiceActivityDetection/AudioClipBuffer.cs)
- Voice activity detection logics
- [x] Queuing-based simple VAD logic
- [x] Queueing-based simple VAD logic -> [QueueingVoiceActivityDetector](./Assets/Mochineko/VoiceActivityDetection/QueueingVoiceActivityDetector.cs)

## How to import by UnityPackageManager

Expand All @@ -21,16 +28,17 @@ Add following dependencies to your `/Packages/manifest.json`.
```json
{
"dependencies": {
"com.mochineko.voice-activity-detection": "https://github.com/mochi-neko/voice-activity-detection-unity?path=/Assets/Mochineko/VoiceActivityDetection#0.1.0",
"com.mochineko.voice-activity-detection": "https://github.com/mochi-neko/voice-activity-detection-unity?path=/Assets/Mochineko/VoiceActivityDetection#0.2.0",
...
}
}
```

## Samples

- [A simple implementation of VAD as component](./Assets/Mochineko/VoiceActivityDetection.Samples/VADSample.cs)
- [A sample implementation of VAD with OpenAI/Whisper API](./Assets/Mochineko/VoiceActivityDetection.Samples/VADToWhisperSample.cs)
- [VAD as component](./Assets/Mochineko/VoiceActivityDetection.Samples/VADSample.cs)
- [VAD with echo](./Assets/Mochineko/VoiceActivityDetection.Samples/VADAudioClipEchoSample.cs)
- [VAD with OpenAI/Whisper API transcription](./Assets/Mochineko/VoiceActivityDetection.Samples/VADToWhisperSample.cs)

See also [Samples](./Assets/Mochineko/VoiceActivityDetection.Samples).

Expand Down

0 comments on commit 6a81a0f

Please sign in to comment.