Skip to content

Commit

Permalink
Merge pull request #2 from marty1885/api-feature-fixes
Browse files Browse the repository at this point in the history
Batch of small fixes for the web API
  • Loading branch information
marty1885 committed Mar 3, 2024
2 parents 4a25cee + aac1535 commit 0f5324c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 15 deletions.
29 changes: 16 additions & 13 deletions paroli-server/api.cpp
Expand Up @@ -4,6 +4,7 @@
#include <trantor/net/EventLoopThreadPool.h>

#include <span>
#include <bit>

#include "piper.hpp"
#include "OggOpusEncoder.hpp"
Expand All @@ -15,7 +16,6 @@ extern piper::PiperConfig piperConfig;
extern piper::Voice voice;
extern std::string authToken;
trantor::EventLoopThreadPool synthesizerThreadPool(3, "synehesizer thread pool");
std::atomic<size_t> synthesizerThreadIndex = 0;

template<typename Func>
requires std::is_invocable_v<Func, const std::span<const short>>
Expand Down Expand Up @@ -163,7 +163,7 @@ SynthesisApiParams parseSynthesisApiParams(const std::string_view json_txt)
return res;
}

std::vector<short> resample(std::span<const short> input, size_t orig_sr, size_t out_sr, int channels)
static std::vector<short> resample(std::span<const short> input, size_t orig_sr, size_t out_sr, int channels)
{
soxr_io_spec_t io_spec = soxr_io_spec(SOXR_INT16_I, SOXR_INT16_I);
soxr_quality_spec_t q_spec = soxr_quality_spec(SOXR_MQ, 0);
Expand Down Expand Up @@ -237,10 +237,7 @@ void v1ws::handleNewConnection(const HttpRequestPtr& req, const WebSocketConnect

void v1ws::handleNewMessage(const WebSocketConnectionPtr& wsConnPtr, std::string&& message, const WebSocketMessageType& type)
{
// yeah yeah this can be raced even with atomic. I don't care not be deal
int index = synthesizerThreadIndex++ % synthesizerThreadPool.size();
index = index % synthesizerThreadPool.size();
synthesizerThreadPool.getLoop(index)->queueInLoop(async_func([=, this]() mutable -> Task<> {
synthesizerThreadPool.getNextLoop()->queueInLoop(async_func([=, this]() mutable -> Task<> {
co_await handleNewMessageAsync(wsConnPtr, std::move(message), type);
}));
}
Expand All @@ -254,7 +251,10 @@ Task<> v1ws::handleNewMessageAsync(WebSocketConnectionPtr wsConnPtr, std::string
params = parseSynthesisApiParams(message);
}
catch (const std::exception& e) {
wsConnPtr->send("ERROR: " + std::string(e.what()));
nlohmann::json resp;
resp["status"] = "failed";
resp["message"] = std::string(e.what());
wsConnPtr->send(resp.dump());
co_return;
}
bool send_opus = params.audio_format.value_or("opus") == "opus";
Expand All @@ -269,12 +269,17 @@ Task<> v1ws::handleNewMessageAsync(WebSocketConnectionPtr wsConnPtr, std::string
wsConnPtr->send((char*)opus.data(), opus.size(), WebSocketMessageType::Binary);
return;
}
// TODO: Fix Big Endian support

if constexpr (std::endian::native == std::endian::big) {
// Convert to little endian
for(int16_t& sample : pcm)
sample = (sample >> 8) | (sample << 8);
}
wsConnPtr->send((char*)pcm.data(), pcm.size() * sizeof(int16_t), WebSocketMessageType::Binary);
}, params.length_scale, params.noise_scale, params.noise_w);

if(!ok) {
wsConnPtr->send("ERROR: Failed to synthesise");
wsConnPtr->send(R"({"status":"failed", "message":"failed to synthesis"})");
co_return;

}
Expand All @@ -283,6 +288,7 @@ Task<> v1ws::handleNewMessageAsync(WebSocketConnectionPtr wsConnPtr, std::string
if(opus.empty() == false)
wsConnPtr->send((char*)opus.data(), opus.size(), WebSocketMessageType::Binary);
}
wsConnPtr->send(R"({"status":"ok", "message":"finished"})");
}

Task<HttpResponsePtr> v1::synthesise(const HttpRequestPtr req)
Expand All @@ -303,10 +309,7 @@ Task<HttpResponsePtr> v1::synthesise(const HttpRequestPtr req)
co_return makeBadRequestResponse("Invalid Authorization");
}

int id = synthesizerThreadIndex++;
if(synthesizerThreadIndex >= synthesizerThreadPool.size())
synthesizerThreadIndex = 0;
auto loop = synthesizerThreadPool.getLoop(id % synthesizerThreadPool.size());
auto loop = synthesizerThreadPool.getNextLoop();
co_await switchThreadCoro(loop);


Expand Down
6 changes: 4 additions & 2 deletions paroli-server/docs/web_api.md
Expand Up @@ -84,20 +84,22 @@ example response:
* Method: GET
* Parameters: None

This endpoint works exactly like the synthesise API above. But audio is streamed in chunk as soon as it can - reducing latency. Message format is the same as the synthesise API body.
This endpoint works exactly like the synthesise API above. But audio is streamed in chunk as soon as it can - reducing latency, as binary blobs. Message format is the same as the synthesise API body. A text message is sent once an error is encountered or synthesis of current text is finished.

For example, the following message causes OPUS audio to be streamed back as binray messages.

```bash
wscat -c 'ws://example.com:8848/api/v1/stream'
> {"text": "Hello! how can I help you"}
< [OPUS audio blob]
< [OPUS audio blob]
< {"status":"ok", "message":"finished"}
```

The server will reply error as text

```bash
wscat -c 'ws://example.com:8848/api/v1/stream'
> {"hello": "blablabla"}
< ERROR: Missing 'text' field
< {"status":"failed", "message":"Missing 'text' field"}
```
7 changes: 7 additions & 0 deletions paroli-server/web-content/index.js
Expand Up @@ -64,6 +64,13 @@ function reconnectWS(connect_fn) {
connect_fn && connect_fn()
});
ws.addEventListener('message', function (event) {

if(typeof event.data == 'string') {
let msg = JSON.parse(event.data);
if(msg["status"] == "ok")
return;
console.error(msg);
}
var data = new Uint8Array(event.data);
if(data.length == 0) return; // ignore empty (pong) messages
player.feed(data);
Expand Down

0 comments on commit 0f5324c

Please sign in to comment.