Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generated an exception: Failed to decode audio While running asr_prep_json.py on custom dataset #5461

Open
3sakshij opened this issue Mar 16, 2024 · 0 comments

Comments

@3sakshij
Copy link

What is your question?

While running the asr_prep_json.py code in fairseq/examples/datasets I am getting exception of failed to decode the audio for few of .wav files. I don't understand why I am getting this error.I tried searching it in the existing issues and also on torchaudio but I still don't find kindly let me know how to fixthis.

Code

from future import absolute_import, division, print_function, unicode_literals

import argparse
import concurrent.futures
import json
import multiprocessing
import os
from collections import namedtuple
from itertools import chain

import sentencepiece as spm
from fairseq.data import Dictionary

MILLISECONDS_TO_SECONDS = 0.001

def process_sample(aud_path, lable, utt_id, sp, tgt_dict):
import torchaudio

input = {}
output = {}
si= t(aud_path)
print(si)
input["length_ms"] = int(si.num_frames / si.num_channels / si.sample_rate / MILLISECONDS_TO_SECONDS)
input["path"] = aud_path
#print(input['path'])


token = " ".join(sp.EncodeAsPieces(lable))
ids = tgt_dict.encode_line(token, append_eos=False)
output["text"] = lable
output["token"] = token
output["tokenid"] = ", ".join(map(str, [t.tolist() for t in ids]))
return {utt_id: {"input": input, "output": output}}

def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--audio-dirs",
nargs="+",
default=["-"],
required=True,
help="input directories with audio files",
)
parser.add_argument(
"--labels",
required=True,
help="aggregated input labels with format per line",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument(
"--spm-model",
required=True,
help="sentencepiece model to use for encoding",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument(
"--dictionary",
required=True,
help="file to load fairseq dictionary from",
type=argparse.FileType("r", encoding="UTF-8"),
)
parser.add_argument("--audio-format", choices=["flac", "wav"], default="wav")
parser.add_argument(
"--output",
required=True,
type=argparse.FileType("w"),
help="path to save json output",
)
args = parser.parse_args()

sp = spm.SentencePieceProcessor()
sp.Load(args.spm_model.name)

tgt_dict = Dictionary.load(args.dictionary)

labels = {}
for line in args.labels:
    #print(line.split(" ", 1))
    (utt_id, label) = line.split(" ", 1)
    labels[utt_id] = label
if len(labels) == 0:
    raise Exception("No labels found in ", args.labels_path)

Sample = namedtuple("Sample", "aud_path utt_id")
samples = []
for path, _, files in chain.from_iterable(
    os.walk(path) for path in args.audio_dirs
):
    for f in files:
        if f.endswith(args.audio_format):
            if len(os.path.splitext(f)) != 2:
                raise Exception("Expect <utt_id.extension> file name. Got: ", f)
            utt_id = os.path.splitext(f)[0]
            if utt_id not in labels:
                continue
            samples.append(Sample(os.path.join(path, f), utt_id))

utts = {}
num_cpu = multiprocessing.cpu_count()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_cpu) as executor:
    future_to_sample = {
        executor.submit(
            process_sample, s.aud_path, labels[s.utt_id], s.utt_id, sp, tgt_dict
        ): s
        for s in samples
    }
    for future in concurrent.futures.as_completed(future_to_sample):
        url = future_to_sample[future]

        try:
            data = future.result()
        except Exception as exc:
            #print(data)
            print("generated an exception: ", exc,url)
        else:
            utts.update(data)
json.dump({"utts": utts}, args.output, indent=4)

if name == "main":
main()

What have you tried?

I tried printing the audios which is causing this issue of failing to decode. but I didn't get why I am getting the issue.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant