/
PicovoiceManager.cs
207 lines (186 loc) · 8.3 KB
/
PicovoiceManager.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
//
// Copyright 2021-2023 Picovoice Inc.
//
// You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
// file accompanying this source.
//
// Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
// an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
using System;
using UnityEngine;
namespace Pv.Unity
{
public class PicovoiceManager
{
private Picovoice _picovoice;
private Action<PicovoiceException> _processErrorCallback;
/// <param name="accessKey">AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).</param>
/// <param name="keywordPath">Absolute path to Porcupine's keyword model file.</param>
/// <param name="wakeWordCallback">
/// User-defined callback invoked upon detection of the wake phrase.
/// The callback accepts no input arguments.
/// </param>
/// <param name="contextPath">
/// Absolute path to file containing context parameters. A context represents the set of
/// expressions(spoken commands), intents, and intent arguments(slots) within a domain of interest.
/// </param>
/// <param name="inferenceCallback">
/// User-defined callback invoked upon completion of intent inference. The callback
/// accepts a single input argument of type `Map<String, dynamic>` that is populated with the following items:
/// (1) IsUnderstood: whether Rhino understood what it heard based on the context
/// (2) Intent: if isUnderstood, name of intent that were inferred
/// (3) Slots: if isUnderstood, dictionary of slot keys and values that were inferred
/// </param>
/// <param name="porcupineModelPath">Absolute path to the file containing Porcupine's model parameters.</param>
/// <param name="porcupineSensitivity">
/// Wake word detection sensitivity. It should be a number within [0, 1]. A higher
/// sensitivity results in fewer misses at the cost of increasing the false alarm rate.
/// </param>
/// <param name="rhinoModelPath">Absolute path to the file containing Rhino's model parameters.</param>
/// <param name="rhinoSensitivity">
/// Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value
/// results in fewer misses at the cost of(potentially) increasing the erroneous inference rate.
/// </returns>
/// <param name="endpointDurationSec">
/// Endpoint duration in seconds. An endpoint is a chunk of silence at the end of an
/// utterance that marks the end of spoken command. It should be a positive number within [0.5, 5]. A lower endpoint
/// duration reduces delay and improves responsiveness. A higher endpoint duration assures Rhino doesn't return inference
/// pre-emptively in case the user pauses before finishing the request.
/// </param>
/// <param name="requireEndpoint">
/// If set to `true`, Rhino requires an endpoint (a chunk of silence) after the spoken command.
/// If set to `false`, Rhino tries to detect silence, but if it cannot, it still will provide inference regardless. Set
/// to `false` only if operating in an environment with overlapping speech (e.g. people talking in the background).
/// </param>
/// <param name="processErrorCallback">Reports errors that are encountered while the engine is processing audio.</returns>
public static PicovoiceManager Create(
string accessKey,
string keywordPath,
Action wakeWordCallback,
string contextPath,
Action<Inference> inferenceCallback,
string porcupineModelPath = null,
float porcupineSensitivity = 0.5f,
string rhinoModelPath = null,
float rhinoSensitivity = 0.5f,
float endpointDurationSec = 1.0f,
bool requireEndpoint = false,
Action<PicovoiceException> processErrorCallback = null)
{
Picovoice picovoice = Picovoice.Create(
accessKey,
keywordPath,
wakeWordCallback,
contextPath,
inferenceCallback,
porcupineModelPath,
porcupineSensitivity,
rhinoModelPath,
rhinoSensitivity,
endpointDurationSec,
requireEndpoint);
return new PicovoiceManager(
picovoice: picovoice,
processErrorCallback: processErrorCallback);
}
/// <summary>
/// PicovoiceManager constructor
/// </summary>
private PicovoiceManager(
Picovoice picovoice,
Action<PicovoiceException> processErrorCallback)
{
_picovoice = picovoice;
_processErrorCallback = processErrorCallback;
}
/// <summary>
/// Action to catch audio frames as voice processor produces them
/// </summary>
/// <param name="frame">Frame of audio</param>
private void OnFrameCaptured(short[] frame)
{
if (_picovoice == null)
{
PicovoiceException ex = new PicovoiceInvalidStateException(
"Cannot capture frames - resources have been released.");
}
try
{
_picovoice.Process(frame);
}
catch (PicovoiceException ex)
{
if (_processErrorCallback != null)
_processErrorCallback(ex);
else
Debug.LogError(ex.ToString());
}
}
/// <summary>
/// Checks to see whether PicovoiceManager is capturing audio or not
/// </summary>
/// <returns>whether PicovoiceManager is capturing audio or not</returns>
public bool IsRecording => VoiceProcessor.Instance.IsRecording;
/// <summary>
/// Checks to see whether there are any audio capture devices available
/// </summary>
/// <returns>whether there are any audio capture devices available</returns>
public bool IsAudioDeviceAvailable()
{
VoiceProcessor.Instance.UpdateDevices();
return VoiceProcessor.Instance.CurrentDeviceIndex >= 0;
}
/// <summary>
/// Starts audio capture and Picovoice processing
/// </summary>
public void Start()
{
if (_picovoice == null)
{
throw new PicovoiceInvalidStateException("Cannot start - resources have been released.");
}
VoiceProcessor.Instance.AddFrameListener(OnFrameCaptured);
VoiceProcessor.Instance.StartRecording(_picovoice.FrameLength, _picovoice.SampleRate);
}
/// <summary>
/// Stops audio capture and Picovoice processing
/// </summary>
public void Stop()
{
if (_picovoice == null)
{
throw new PicovoiceInvalidStateException("Cannot stop - resources have been released.");
}
VoiceProcessor.Instance.RemoveFrameListener(OnFrameCaptured);
if (VoiceProcessor.Instance.NumFrameListeners == 0)
{
VoiceProcessor.Instance.StopRecording();
}
}
/// <summary>
/// Resets the internal state of Picovoice. It should be called before processing a new stream of audio
/// or when process was stopped while processing a stream of audio.
/// </summary>
public void Reset()
{
if (_picovoice == null)
{
throw new PicovoiceInvalidStateException("Cannot reset - resources have been released.");
}
_picovoice.Reset();
}
/// <summary>
/// Frees memory that was allocated for Picovoice
/// </summary>
public void Dispose()
{
if (_picovoice != null)
{
_picovoice.Dispose();
_picovoice = null;
}
}
}
}