diff --git a/.faq/FAQ.md b/.faq/FAQ.md new file mode 100644 index 000000000..24516eace --- /dev/null +++ b/.faq/FAQ.md @@ -0,0 +1,20 @@ + +# Frequently Asked Questions + +{%- for question in questions %} +- [{{ question.title }}](#{{ question.slug }}) +{%- endfor %} + + +{%- for question in questions %} + + +## {{ question.title }} + +{{ question.body }} + +{%- endfor %} + +
+ +Generated by [FAQtory](https://github.com/willmcgugan/faqtory) diff --git a/.faq/suggest.md b/.faq/suggest.md new file mode 100644 index 000000000..19e7b6d69 --- /dev/null +++ b/.faq/suggest.md @@ -0,0 +1,34 @@ +Thank you for your issue. + +{%- if questions -%} +{% if questions|length == 1 %} +We found the following entry in the [FAQ]({{ faq_url }}) which you may find helpful: +{%- else %} +We found the following entries in the [FAQ]({{ faq_url }}) which you may find helpful: +{%- endif %} + +{% for question in questions %} +- [{{ question.title }}]({{ faq_url }}#{{ question.slug }}) +{%- endfor %} + +{%- else -%} +You might want to check the [FAQ]({{ faq_url }}) if you haven't done so already. +{%- endif %} + +Feel free to close this issue if you found an answer in the FAQ. + +If your issue is a feature request, please read [this](https://xyproblem.info/) first and update your request accordingly, if needed. + +If your issue is a bug report, please provide a [minimum reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) as a link to a self-contained [Google Colab](https://colab.research.google.com/) notebook containing everthing needed to reproduce the bug: + - installation + - data preparation + - model download + - etc. + +Providing an MRE will increase your chance of getting an answer from the community (either maintainers or other power users). + +Companies relying on `pyannote.audio` in production may contact [me](https://herve.niderb.fr) via email regarding: +* paid scientific consulting around speaker diarization and speech processing in general; +* custom models and tailored features (via the local tech transfer office). + +> This is an automated reply, generated by [FAQtory](https://github.com/willmcgugan/faqtory) diff --git a/.github/stale.yml b/.github/stale.yml index dc90e5a1c..3cb76e884 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,7 +1,7 @@ # Number of days of inactivity before an issue becomes stale -daysUntilStale: 60 +daysUntilStale: 180 # Number of days of inactivity before a stale issue is closed -daysUntilClose: 7 +daysUntilClose: 30 # Issues with these labels will never be considered stale exemptLabels: - pinned diff --git a/.github/workflows/new_issue.yml b/.github/workflows/new_issue.yml new file mode 100644 index 000000000..b8477dc16 --- /dev/null +++ b/.github/workflows/new_issue.yml @@ -0,0 +1,29 @@ +name: issues +on: + issues: + types: [opened] +jobs: + add-comment: + runs-on: ubuntu-latest + permissions: + issues: write + steps: + - uses: actions/checkout@v3 + with: + ref: develop + - name: Install FAQtory + run: pip install FAQtory + - name: Run Suggest + env: + TITLE: ${{ github.event.issue.title }} + run: faqtory suggest "$TITLE" > suggest.md + - name: Read suggest.md + id: suggest + uses: juliangruber/read-file-action@v1 + with: + path: ./suggest.md + - name: Suggest FAQ + uses: peter-evans/create-or-update-comment@a35cf36e5301d70b76f316e867e7788a55a31dae + with: + issue-number: ${{ github.event.issue.number }} + body: ${{ steps.suggest.outputs.content }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 52aad4944..90a4302c6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,9 +2,9 @@ name: Tests on: push: - branches: [ develop ] + branches: [develop] pull_request: - branches: [ develop ] + branches: [develop] jobs: build: @@ -13,28 +13,21 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9] + python-version: [3.8, 3.9, "3.10"] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install libsndfile - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt-get install libsndfile1 - - name: Install pyannote.audio - run: | + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install libsndfile + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install libsndfile1 + - name: Install pyannote.audio + run: | pip install -e .[dev,testing] - - name: Test with pytest - run: | - export PYANNOTE_DATABASE_CONFIG=$GITHUB_WORKSPACE/tests/data/database.yml - pytest --cov-report=xml - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1 - with: - file: ./coverage.xml - env_vars: PYTHON - name: codecov-pyannote-audio - fail_ci_if_error: false + - name: Test with pytest + run: | + pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d1a6a64cd..549e46ad0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: args: ["--profile", "black"] # Formatting, Whitespace, etc - - repo: git://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.2.3 hooks: - id: trailing-whitespace diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..c444380d7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,128 @@ +# Changelog + +## Version 3.0.0 (2023-09-26) + +### Features and improvements + + - feat(pipeline): send pipeline to device with `pipeline.to(device)` + - feat(pipeline): add `return_embeddings` option to `SpeakerDiarization` pipeline + - feat(pipeline): make `segmentation_batch_size` and `embedding_batch_size` mutable in `SpeakerDiarization` pipeline (they now default to `1`) + - feat(pipeline): add progress hook to pipelines + - feat(task): add [powerset](https://www.isca-speech.org/archive/interspeech_2023/plaquet23_interspeech.html) support to `SpeakerDiarization` task + - feat(task): add support for multi-task models + - feat(task): add support for label scope in speaker diarization task + - feat(task): add support for missing classes in multi-label segmentation task + - feat(model): add segmentation model based on torchaudio self-supervised representation + - feat(pipeline): check version compatibility at load time + - improve(task): load metadata as tensors rather than pyannote.core instances + - improve(task): improve error message on missing specifications + +### Breaking changes + + - BREAKING(task): rename `Segmentation` task to `SpeakerDiarization` + - BREAKING(pipeline): pipeline defaults to CPU (use `pipeline.to(device)`) + - BREAKING(pipeline): remove `SpeakerSegmentation` pipeline (use `SpeakerDiarization` pipeline) + - BREAKING(pipeline): remove `segmentation_duration` parameter from `SpeakerDiarization` pipeline (defaults to `duration` of segmentation model) + - BREAKING(task): remove support for variable chunk duration for segmentation tasks + - BREAKING(pipeline): remove support for `FINCHClustering` and `HiddenMarkovModelClustering` + - BREAKING(setup): drop support for Python 3.7 + - BREAKING(io): channels are now 0-indexed (used to be 1-indexed) + - BREAKING(io): multi-channel audio is no longer downmixed to mono by default. + You should update how `pyannote.audio.core.io.Audio` is instantiated: + * replace `Audio()` by `Audio(mono="downmix")`; + * replace `Audio(mono=True)` by `Audio(mono="downmix")`; + * replace `Audio(mono=False)` by `Audio()`. + - BREAKING(model): get rid of (flaky) `Model.introspection` + If, for some weird reason, you wrote some custom code based on that, + you should instead rely on `Model.example_output`. + - BREAKING(interactive): remove support for Prodigy recipes + + +### Fixes and improvements + + - fix(pipeline): fix reproducibility issue with Ampere CUDA devices + - fix(pipeline): fix support for IOBase audio + - fix(pipeline): fix corner case with no speaker + - fix(train): prevent metadata preparation to happen twice + - fix(task): fix support for "balance" option + - improve(task): shorten and improve structure of Tensorboard tags + +### Dependencies update + + - setup: switch to torch 2.0+, torchaudio 2.0+, soundfile 0.12+, lightning 2.0+, torchmetrics 0.11+ + - setup: switch to pyannote.core 5.0+, pyannote.database 5.0+, and pyannote.pipeline 3.0+ + - setup: switch to speechbrain 0.5.14+ + +## Version 2.1.1 (2022-10-27) + + - BREAKING(pipeline): rewrite speaker diarization pipeline + - feat(pipeline): add option to optimize for DER variant + - feat(clustering): add support for NeMo speaker embedding + - feat(clustering): add FINCH clustering + - feat(clustering): add min_cluster_size hparams to AgglomerativeClustering + - feat(hub): add support for private/gated models + - setup(hub): switch to latest hugginface_hub API + - fix(pipeline): fix support for missing reference in Resegmentation pipeline + - fix(clustering) fix corner case where HMM.fit finds too little states + +## Version 2.0.1 (2022-07-20) + + - BREAKING: complete rewrite + - feat: much better performance + - feat: Python-first API + - feat: pretrained pipelines (and models) on Huggingface model hub + - feat: multi-GPU training with pytorch-lightning + - feat: data augmentation with torch-audiomentations + - feat: Prodigy recipe for model-assisted audio annotation + +## Version 1.1.2 (2021-01-28) + + - fix: make sure master branch is used to load pretrained models (#599) + +## Version 1.1 (2020-11-08) + + - last release before complete rewriting + +## Version 1.0.1 (2018-07-19) + + - fix: fix regression in Precomputed.__call__ (#110, #105) + +## Version 1.0 (2018-07-03) + + - chore: switch from keras to pytorch (with tensorboard support) + - improve: faster & better traning (`AutoLR`, advanced learning rate schedulers, improved batch generators) + - feat: add tunable speaker diarization pipeline (with its own tutorial) + - chore: drop support for Python 2 (use Python 3.6 or later) + +## Version 0.3.1 (2017-07-06) + + - feat: add python 3 support + - chore: rewrite neural speaker embedding using autograd + - feat: add new embedding architectures + - feat: add new embedding losses + - chore: switch to Keras 2 + - doc: add tutorial for (MFCC) feature extraction + - doc: add tutorial for (LSTM-based) speech activity detection + - doc: add tutorial for (LSTM-based) speaker change detection + - doc: add tutorial for (TristouNet) neural speaker embedding + +## Version 0.2.1 (2017-03-28) + + - feat: add LSTM-based speech activity detection + - feat: add LSTM-based speaker change detection + - improve: refactor LSTM-based speaker embedding + - feat: add librosa basic support + - feat: add SMORMS3 optimizer + +## Version 0.1.4 (2016-09-26) + + - feat: add 'covariance_type' option to BIC segmentation + +## Version 0.1.3 (2016-09-23) + + - chore: rename sequence generator in preparation of the release of + TristouNet reproducible research package. + +## Version 0.1.2 (2016-09-22) + + - first public version diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 000000000..f192b2939 --- /dev/null +++ b/FAQ.md @@ -0,0 +1,54 @@ + +# Frequently Asked Questions +- [Can I apply pretrained pipelines on audio already loaded in memory?](#can-i-apply-pretrained-pipelines-on-audio-already-loaded-in-memory) +- [Can I use gated models (and pipelines) offline?](#can-i-use-gated-models-(and-pipelines)-offline) +- [Does pyannote support streaming speaker diarization?](#does-pyannote-support-streaming-speaker-diarization) +- [How can I improve performance?](#how-can-i-improve-performance) +- [How does one spell and pronounce pyannote.audio?](#how-does-one-spell-and-pronounce-pyannoteaudio) + + +## Can I apply pretrained pipelines on audio already loaded in memory? + +Yes: read [this tutorial](tutorials/applying_a_pipeline.ipynb) until the end. + + +## Can I use gated models (and pipelines) offline? + +**Short answer**: yes, see [this tutorial](tutorials/applying_a_model.ipynb) for models and [that one](tutorials/applying_a_pipeline.ipynb) for pipelines. + +**Long answer**: gating models and pipelines allows [me](https://herve.niderb.fr) to know a bit more about `pyannote.audio` user base and eventually help me write grant proposals to make `pyannote.audio` even better. So, please fill gating forms as precisely as possible. + +For instance, before gating `pyannote/speaker-diarization`, I had no idea that so many people were relying on it in production. Hint: sponsors are more than welcome! Maintaining open source libraries is time consuming. + +That being said, this whole authentication process does not prevent you from using official `pyannote.audio` models offline (i.e. without going through the authentication process in every `docker run ...` or whatever you are using in production): see [this tutorial](tutorials/applying_a_model.ipynb) for models and [that one](tutorials/applying_a_pipeline.ipynb) for pipelines. + + +## Does pyannote support streaming speaker diarization? + +**Short answer:** not out of the box, no. + +**Long answer:** [I](https://herve.niderb.fr) am looking for sponsors to add this feature. In the meantime, [`diart`](https://github.com/juanmc2005/StreamingSpeakerDiarization) is the closest you can get from a streaming `pyannote.audio`. You might also be interested in [this blog post](https://herve.niderb.fr/fastpages/2021/08/05/Streaming-voice-activity-detection-with-pyannote.html) about streaming voice activity detection based on `pyannote.audio`. + + +## How can I improve performance? + +**Long answer:** + +1. Manually annotate dozens of conversations as precisely as possible. +2. Separate them into train (80%), development (10%) and test (10%) subsets. +3. Setup the data for use with [`pyannote.database`](https://github.com/pyannote/pyannote-database#speaker-diarization). +4. Follow [this recipe](https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/adapting_pretrained_pipeline.ipynb). +5. Enjoy. + +**Also:** [I am available](https://herve.niderb.fr) for contracting to help you with that. + + +## How does one spell and pronounce pyannote.audio? + +📝 Written in lower case: `pyannote.audio` (or `pyannote` if you are lazy). Not `PyAnnote` nor `PyAnnotate` (sic). +📢 Pronounced like the french verb `pianoter`. `pi` like in `pi`ano, not `py` like in `py`thon. +🎹 `pianoter` means to play the piano (hence the logo 🤯). + +
+ +Generated by [FAQtory](https://github.com/willmcgugan/faqtory) diff --git a/README.md b/README.md index fc779a2c9..13418df8d 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,32 @@ -# Neural speaker diarization with `pyannote.audio` +Using `pyannote.audio` open-source toolkit in production? +Make the most of it thanks to our [consulting services](https://herve.niderb.fr/consulting.html). -`pyannote.audio` is an open-source toolkit written in Python for speaker diarization. Based on [PyTorch](pytorch.org) machine learning framework, it provides a set of trainable end-to-end neural building blocks that can be combined and jointly optimized to build speaker diarization pipelines. +# `pyannote.audio` speaker diarization toolkit + +`pyannote.audio` is an open-source toolkit written in Python for speaker diarization. Based on [PyTorch](pytorch.org) machine learning framework, it comes with state-of-the-art [pretrained models and pipelines](https://hf.co/pyannote), that can be further finetuned to your own data for even better performance.

-## TL;DR [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pyannote/pyannote-audio/blob/develop/tutorials/intro.ipynb) +## TL;DR + +1. Install [`pyannote.audio`](https://github.com/pyannote/pyannote-audio) `3.0` with `pip install pyannote.audio` +2. Accept [`pyannote/segmentation-3.0`](https://hf.co/pyannote/segmentation-3.0) user conditions +3. Accept [`pyannote/speaker-diarization-3.0`](https://hf.co/pyannote-speaker-diarization-3.0) user conditions +4. Create access token at [`hf.co/settings/tokens`](https://hf.co/settings/tokens). ```python -# instantiate pretrained speaker diarization pipeline from pyannote.audio import Pipeline -pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") +pipeline = Pipeline.from_pretrained( + "pyannote/speaker-diarization-3.0", + use_auth_token="HUGGINGFACE_ACCESS_TOKEN_GOES_HERE") + +# send pipeline to GPU (when available) +import torch +pipeline.to(torch.device("cuda")) # apply pretrained pipeline diarization = pipeline("audio.wav") @@ -21,41 +34,24 @@ diarization = pipeline("audio.wav") # print the result for turn, _, speaker in diarization.itertracks(yield_label=True): print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}") -# start=0.2s stop=1.5s speaker_A -# start=1.8s stop=3.9s speaker_B -# start=4.2s stop=5.7s speaker_A +# start=0.2s stop=1.5s speaker_0 +# start=1.8s stop=3.9s speaker_1 +# start=4.2s stop=5.7s speaker_0 # ... ``` -## What's new in `pyannote.audio` 2.0 +## Highlights -For version 2.x of `pyannote.audio`, [I](https://herve.niderb.fr) decided to rewrite almost everything from scratch. -Highlights of this release are: - -- :exploding_head: much better performance (see [Benchmark](#benchmark)) -- :snake: Python-first API - :hugs: pretrained [pipelines](https://hf.co/models?other=pyannote-audio-pipeline) (and [models](https://hf.co/models?other=pyannote-audio-model)) on [:hugs: model hub](https://huggingface.co/pyannote) +- :exploding_head: state-of-the-art performance (see [Benchmark](#benchmark)) +- :snake: Python-first API - :zap: multi-GPU training with [pytorch-lightning](https://pytorchlightning.ai/) -- :control_knobs: data augmentation with [torch-audiomentations](https://github.com/asteroid-team/torch-audiomentations) -- :boom: [Prodigy](https://prodi.gy/) recipes for model-assisted audio annotation - -## Installation - -Only Python 3.8+ is officially supported (though it might work with Python 3.7) - -```bash -conda create -n pyannote python=3.8 -conda activate pyannote - -# pytorch 1.11 is required for speechbrain compatibility -# (see https://pytorch.org/get-started/previous-versions/#v1110) -conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 -c pytorch -pip install pyannote.audio -``` ## Documentation +- [Changelog](CHANGELOG.md) +- [Frequently asked questions](FAQ.md) - Models - Available tasks explained - [Applying a pretrained model](tutorials/applying_a_model.ipynb) @@ -63,73 +59,61 @@ pip install pyannote.audio - Pipelines - Available pipelines explained - [Applying a pretrained pipeline](tutorials/applying_a_pipeline.ipynb) + - [Adapting a pretrained pipeline to your own data](tutorials/adapting_pretrained_pipeline.ipynb) - [Training a pipeline](tutorials/voice_activity_detection.ipynb) - Contributing - [Adding a new model](tutorials/add_your_own_model.ipynb) - [Adding a new task](tutorials/add_your_own_task.ipynb) - Adding a new pipeline - Sharing pretrained models and pipelines -- Miscellaneous - - [Training with `pyannote-audio-train` command line tool](tutorials/training_with_cli.md) - - [Annotating your own data with Prodigy](tutorials/prodigy.md) - - [Speaker verification](tutorials/speaker_verification.ipynb) - - Visualization and debugging - -## Frequently asked questions - -#### How does one capitalize and pronounce the name of this awesome library? - -📝 Written in lower case: `pyannote.audio` (or `pyannote` if you are lazy). Not `PyAnnote` nor `PyAnnotate` (*sic*). -📢 [Pronounced](https://www.howtopronounce.com/french/pianote) like the french verb *pianoter*. *pi* like in **pi**ano, not *py* like in **py**thon. -🎹 *pianoter* means *to play the piano* (hence the logo 🤯). - -#### **[Pretrained pipelines](https://huggingface.co/models?other=pyannote-audio-pipeline) do not produce good results on my data. What can I do?** - -1. [Annotate](https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/prodigy.md) dozens of conversations manually and separate them into development and test subsets in [`pyannote.database`](https://github.com/pyannote/pyannote-database#speaker-diarization). -2. [Optimize the hyper-parameters](https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/voice_activity_detection.ipynb) of the pretained pipeline using the development set. If performance is still not good enough, go to step 3. -3. Annotate hundreds of conversations manually and set them up as training subset in `pyannote.database`. -4. [Fine-tune](https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/training_a_model.ipynb) the models (on which the pipeline relies) using the training set. -5. [Optimize the hyper-parameters](https://github.com/pyannote/pyannote-audio/blob/develop/tutorials/voice_activity_detection.ipynb) of the pipeline using the fine-tuned models using the development set. If performance is still not good enough, go back to step 3. - +- Blog + - 2022-12-02 > ["How I reached 1st place at Ego4D 2022, 1st place at Albayzin 2022, and 6th place at VoxSRC 2022 speaker diarization challenges"](tutorials/adapting_pretrained_pipeline.ipynb) + - 2022-10-23 > ["One speaker segmentation model to rule them all"](https://herve.niderb.fr/fastpages/2022/10/23/One-speaker-segmentation-model-to-rule-them-all) + - 2021-08-05 > ["Streaming voice activity detection with pyannote.audio"](https://herve.niderb.fr/fastpages/2021/08/05/Streaming-voice-activity-detection-with-pyannote.html) +- Videos + - [Introduction to speaker diarization](https://umotion.univ-lemans.fr/video/9513-speech-segmentation-and-speaker-diarization/) / JSALT 2023 summer school / 90 min + - [Speaker segmentation model](https://www.youtube.com/watch?v=wDH2rvkjymY) / Interspeech 2021 / 3 min + - [First releaase of pyannote.audio](https://www.youtube.com/watch?v=37R_R82lfwA) / ICASSP 2020 / 8 min ## Benchmark -Out of the box, `pyannote.audio` default speaker diarization pipeline is expected to be much better (and faster) in v2.0 than in v1.1.: - -| Dataset | DER% with v1.1 | DER% with v2.0 | Relative improvement | -| ----------- | -------------- | -------------- | -------------------- | -| AMI | 29.7% | 18.2% | 38% | -| DIHARD | 29.2% | 21.0% | 28% | -| VoxConverse | 21.5% | 12.6% | 41% | - -A more detailed benchmark is available [here](https://hf.co/pyannote/speaker-diarization). +Out of the box, `pyannote.audio` speaker diarization [pipeline](https://hf.co/pyannote/speaker-diarization-3.0) v3.0 is expected to be much better (and faster) than v2.x. +Those numbers are diarization error rates (in %): + +| Dataset \ Version | v1.1 | v2.0 | [v2.1](https://hf.co/pyannote/speaker-diarization-2.1) | [v3.0](https://hf.co/pyannote/speaker-diarization-3.0) | Premium | +| ---------------------- | ---- | ---- | ------ | ------ | --------- | +| AISHELL-4 | - | 14.6 | 14.1 | 12.3 | 12.3 | +| AliMeeting (channel 1) | - | - | 27.4 | 24.3 | 19.4 | +| AMI (IHM) | 29.7 | 18.2 | 18.9 | 19.0 | 16.7 | +| AMI (SDM) | - | 29.0 | 27.1 | 22.2 | 20.1 | +| AVA-AVD | - | - | - | 49.1 | 42.7 | +| DIHARD 3 (full) | 29.2 | 21.0 | 26.9 | 21.7 | 17.0 | +| MSDWild | - | - | - | 24.6 | 20.4 | +| REPERE (phase2) | - | 12.6 | 8.2 | 7.8 | 7.8 | +| VoxConverse (v0.3) | 21.5 | 12.6 | 11.2 | 11.3 | 9.5 | ## Citations If you use `pyannote.audio` please use the following citations: ```bibtex -@inproceedings{Bredin2020, - Title = {{pyannote.audio: neural building blocks for speaker diarization}}, - Author = {{Bredin}, Herv{\'e} and {Yin}, Ruiqing and {Coria}, Juan Manuel and {Gelly}, Gregory and {Korshunov}, Pavel and {Lavechin}, Marvin and {Fustes}, Diego and {Titeux}, Hadrien and {Bouaziz}, Wassim and {Gill}, Marie-Philippe}, - Booktitle = {ICASSP 2020, IEEE International Conference on Acoustics, Speech, and Signal Processing}, - Year = {2020}, +@inproceedings{Plaquet23, + author={Alexis Plaquet and Hervé Bredin}, + title={{Powerset multi-class cross entropy loss for neural speaker diarization}}, + year=2023, + booktitle={Proc. INTERSPEECH 2023}, } ``` ```bibtex -@inproceedings{Bredin2021, - Title = {{End-to-end speaker segmentation for overlap-aware resegmentation}}, - Author = {{Bredin}, Herv{\'e} and {Laurent}, Antoine}, - Booktitle = {Proc. Interspeech 2021}, - Year = {2021}, +@inproceedings{Bredin23, + author={Hervé Bredin}, + title={{pyannote.audio 2.1 speaker diarization pipeline: principle, benchmark, and recipe}}, + year=2023, + booktitle={Proc. INTERSPEECH 2023}, } ``` -## Support - -For commercial enquiries and scientific consulting, please contact [me](mailto:herve@niderb.fr). - ## Development The commands below will setup pre-commit hooks and packages needed for developing the `pyannote.audio` library. @@ -139,9 +123,8 @@ pip install -e .[dev,testing] pre-commit install ``` -Tests rely on a set of debugging files available in [`test/data`](test/data) directory. -Set `PYANNOTE_DATABASE_CONFIG` environment variable to `test/data/database.yml` before running tests: +## Test ```bash -PYANNOTE_DATABASE_CONFIG=tests/data/database.yml pytest +pytest ``` diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst deleted file mode 100644 index be69c3f54..000000000 --- a/doc/source/changelog.rst +++ /dev/null @@ -1,75 +0,0 @@ -######### -Changelog -######### - -Version 2.0.1 (2022-07-20) -~~~~~~~~~~~~~~~~~~~~~~~~ - - - BREAKING: complete rewrite - - feat: much better performance - - feat: Python-first API - - feat: pretrained pipelines (and models) on Huggingface model hub - - feat: multi-GPU training with pytorch-lightning - - feat: data augmentation with torch-audiomentations - - feat: Prodigy recipe for model-assisted audio annotation - -Version 1.1.2 (2021-01-28) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - fix: make sure master branch is used to load pretrained models (#599) - -Version 1.1 (2020-11-08) -~~~~~~~~~~~~~~~~~~~~~~~~ - - - last release before complete rewriting - -Version 1.0.1 (2018--07-19) -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - fix: fix regression in Precomputed.__call__ (#110, #105) - -Version 1.0 (2018-07-03) -~~~~~~~~~~~~~~~~~~~~~~~~ - - - chore: switch from keras to pytorch (with tensorboard support) - - improve: faster & better traning (`AutoLR`, advanced learning rate schedulers, improved batch generators) - - feat: add tunable speaker diarization pipeline (with its own tutorial) - - chore: drop support for Python 2 (use Python 3.6 or later) - -Version 0.3.1 (2017-07-06) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - feat: add python 3 support - - chore: rewrite neural speaker embedding using autograd - - feat: add new embedding architectures - - feat: add new embedding losses - - chore: switch to Keras 2 - - doc: add tutorial for (MFCC) feature extraction - - doc: add tutorial for (LSTM-based) speech activity detection - - doc: add tutorial for (LSTM-based) speaker change detection - - doc: add tutorial for (TristouNet) neural speaker embedding - -Version 0.2.1 (2017-03-28) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - feat: add LSTM-based speech activity detection - - feat: add LSTM-based speaker change detection - - improve: refactor LSTM-based speaker embedding - - feat: add librosa basic support - - feat: add SMORMS3 optimizer - -Version 0.1.4 (2016-09-26) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - feat: add 'covariance_type' option to BIC segmentation - -Version 0.1.3 (2016-09-23) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - chore: rename sequence generator in preparation of the release of - TristouNet reproducible research package. - -Version 0.1.2 (2016-09-22) -~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - first public version diff --git a/doc/source/index.rst b/doc/source/index.rst index 69fabbf08..925a460ce 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -9,6 +9,8 @@ Installation :: + $ conda create -n pyannote python=3.10 + $ conda activate pyannote $ pip install pyannote.audio @@ -17,5 +19,3 @@ API documentation .. toctree:: :maxdepth: 2 - - changelog diff --git a/faq.yml b/faq.yml new file mode 100644 index 000000000..f473198d5 --- /dev/null +++ b/faq.yml @@ -0,0 +1,7 @@ +# FAQtory settings + +faq_url: "https://github.com/pyannote/pyannote-audio/blob/develop/FAQ.md" # Replace this with the URL to your FAQ.md! + +questions_path: "./questions" # Where questions should be stored +output_path: "./FAQ.md" # Where FAQ.md should be generated +templates_path: ".faq" # Path to templates diff --git a/notebook/augmentation.ipynb b/notebook/augmentation.ipynb index 7af1250e4..656eae4a7 100644 --- a/notebook/augmentation.ipynb +++ b/notebook/augmentation.ipynb @@ -12,7 +12,7 @@ " preprocessors={\"audio\": FileFinder()})\n", "\n", "from pyannote.audio.core.io import Audio\n", - "audio = Audio(sample_rate=16000, mono=True)\n", + "audio = Audio(sample_rate=16000, mono=\"downmix\")\n", "file = next(protocol.test())\n", "\n", "from pyannote.core import Segment\n", diff --git a/notebook/example.ipynb b/notebook/example.ipynb index 468f30b96..549656d19 100644 --- a/notebook/example.ipynb +++ b/notebook/example.ipynb @@ -2,18 +2,28 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'Debug.SpeakerDiarization.Debug' found in /Users/hbredin/Development/pyannote/pyannote-audio/tests/data/database.yml does not define the 'scope' of speaker labels (file, database, or global). Setting it to 'file'.\n", + "'Debug.SpeakerDiarization.Bug1237' found in /Users/hbredin/Development/pyannote/pyannote-audio/tests/data/database.yml does not define the 'scope' of speaker labels (file, database, or global). Setting it to 'file'.\n" + ] + } + ], "source": [ - "from pyannote.database import get_protocol, FileFinder\n", - "protocol = get_protocol('Debug.SpeakerDiarization.Debug', \n", - " preprocessors={\"audio\": FileFinder()})" + "from pyannote.database import registry, FileFinder\n", + "registry.load_database('../tests/data/database.yml')\n", + "protocol = registry.get_protocol('Debug.SpeakerDiarization.Debug', \n", + " preprocessors={\"audio\": FileFinder()})" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -24,26 +34,133 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from pyannote.audio.utils.preview import listen\n", "listen(next(protocol.train()))" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "## Voice activity detection" + "## Voice activity detection" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Protocol Debug.SpeakerDiarization.Debug does not precompute the output of torchaudio.info(): adding a 'torchaudio.info' preprocessor for you to speed up dataloaders. See pyannote.database documentation on how to do that yourself.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "Missing logger folder: /Users/hbredin/Development/pyannote/pyannote-audio/notebook/lightning_logs\n", + "\n", + " | Name | Type | Params | In sizes | Out sizes \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "0 | mfcc | MFCC | 0 | [32, 1, 32000] | [32, 1, 40, 161] \n", + "1 | lstm | LSTM | 18.9 K | [32, 161, 40] | [[32, 161, 64], [[2, 32, 32], [2, 32, 32]]]\n", + "2 | classifier | Linear | 65 | [32, 161, 64] | [32, 161, 1] \n", + "3 | activation | Sigmoid | 0 | [32, 161, 1] | [32, 161, 1] \n", + "4 | validation_metric | MetricCollection | 0 | ? | ? \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "19.0 K Trainable params\n", + "0 Non-trainable params\n", + "19.0 K Total params\n", + "0.076 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "37e122a182e544648e65bed1f71ae722", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4e8a349a91e447a49ea0d93cccfd266c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "90c5c47fd1ce4ee98f9729258fab9692", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], "source": [ "from pyannote.audio.tasks import VoiceActivityDetection\n", "vad = VoiceActivityDetection(protocol, duration=2., batch_size=32, num_workers=4)\n", @@ -52,26 +169,6 @@ "_ = trainer.fit(model)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Speaker change detection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pyannote.audio.tasks import SpeakerChangeDetection\n", - "scd = SpeakerChangeDetection(protocol, duration=2., batch_size=32, num_workers=4)\n", - "model = SimpleSegmentationModel(task=scd)\n", - "trainer = pl.Trainer(max_epochs=1)\n", - "_ = trainer.fit(model)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -81,9 +178,82 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params | In sizes | Out sizes \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "0 | mfcc | MFCC | 0 | [32, 1, 32000] | [32, 1, 40, 161] \n", + "1 | lstm | LSTM | 18.9 K | [32, 161, 40] | [[32, 161, 64], [[2, 32, 32], [2, 32, 32]]]\n", + "2 | classifier | Linear | 65 | [32, 161, 64] | [32, 161, 1] \n", + "3 | activation | Sigmoid | 0 | [32, 161, 1] | [32, 161, 1] \n", + "4 | validation_metric | MetricCollection | 0 | ? | ? \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "19.0 K Trainable params\n", + "0 Non-trainable params\n", + "19.0 K Total params\n", + "0.076 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9a8333c0ebdf41a79482d81e94bf0e76", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "96c422e9449e40fa82a7f274cc6e5e16", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1f343adebc5a407ab7b33b5b88094edf", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], "source": [ "from pyannote.audio.tasks import OverlappedSpeechDetection\n", "ovl = OverlappedSpeechDetection(protocol, duration=2., batch_size=32, num_workers=4)\n", @@ -93,45 +263,154 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "## Segmentation" + "## (Local) speaker diarization" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "574568dce865407e912423c55dfb002d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params | In sizes | Out sizes \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "0 | mfcc | MFCC | 0 | [32, 1, 32000] | [32, 1, 40, 161] \n", + "1 | lstm | LSTM | 18.9 K | [32, 161, 40] | [[32, 161, 64], [[2, 32, 32], [2, 32, 32]]]\n", + "2 | classifier | Linear | 195 | [32, 161, 64] | [32, 161, 3] \n", + "3 | activation | Sigmoid | 0 | [32, 161, 3] | [32, 161, 3] \n", + "4 | validation_metric | MetricCollection | 0 | ? | ? \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "19.1 K Trainable params\n", + "0 Non-trainable params\n", + "19.1 K Total params\n", + "0.077 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - 27.32% of all chunks contain no speech at all.\n", + " - 66.79% contain 1 speaker or less\n", + " - 91.25% contain 2 speakers or less\n", + " - 98.04% contain 3 speakers or less\n", + " - 100.00% contain 4 speakers or less\n", + "Setting `max_speakers_per_chunk` to 3. You can override this value (or avoid this estimation step) by passing `max_speakers_per_chunk=3` to the task constructor.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "423b6758f1c84f3b8ece70744862fb66", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e85cb23d204c4876a82a8202edc273c5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b8f5929201654acf83fbca919a5ac63b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Validation: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], "source": [ - "from pyannote.audio.tasks import Segmentation\n", - "seg = Segmentation(protocol, duration=2., batch_size=32, num_workers=4)\n", + "from pyannote.audio.tasks import SpeakerDiarization\n", + "seg = SpeakerDiarization(protocol, duration=2., batch_size=32, num_workers=4)\n", "model = SimpleSegmentationModel(task=seg)\n", "trainer = pl.Trainer(max_epochs=1)\n", "_ = trainer.fit(model)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Speaker tracking" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pyannote.audio.tasks import SpeakerTracking\n", - "spk = SpeakerTracking(protocol, duration=2., batch_size=32, num_workers=4)\n", - "model = SimpleSegmentationModel(task=spk)\n", - "trainer = pl.Trainer(max_epochs=1)\n", - "_ = trainer.fit(model)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -141,9 +420,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "Loading Debug.SpeakerDiarization.Debug training labels: 10file [00:00, 363.55file/s]\n", + "\n", + " | Name | Type | Params | In sizes | Out sizes \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "0 | mfcc | MFCC | 0 | [32, 1, 32000] | [32, 1, 40, 161] \n", + "1 | lstm | LSTM | 18.9 K | [32, 161, 40] | [[32, 161, 64], [[2, 32, 32], [2, 32, 32]]]\n", + "2 | loss_func | ArcFaceLoss | 832 | ? | ? \n", + "3 | validation_metric | MetricCollection | 0 | ? | ? \n", + "----------------------------------------------------------------------------------------------------------------------\n", + "19.8 K Trainable params\n", + "0 Non-trainable params\n", + "19.8 K Total params\n", + "0.079 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b34c5cbe6c9c409fb5572b0be9598076", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "71077814aa154ba3b4892e8be13a98fe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], "source": [ "from pyannote.audio.tasks import SpeakerEmbedding\n", "emb = SpeakerEmbedding(protocol, duration=2., num_workers=4)\n", @@ -169,7 +507,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/notebook/inference.ipynb b/notebook/inference.ipynb index 77e02a27c..b33e64282 100644 --- a/notebook/inference.ipynb +++ b/notebook/inference.ipynb @@ -104,19 +104,6 @@ "scores = inference.crop(dev_file, Segment(10, 15))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# inference with progress bar\n", - "inference = Inference(model, step=0.1, batch_size=32, progress_hook='Processing...')\n", - "scores = inference(dev_file)\n", - "inference = Inference(model, step=0.1, batch_size=1, progress_hook=True)\n", - "scores = inference.crop(dev_file, Segment(10, 15))" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/pyannote/audio/cli/evaluate.py b/pyannote/audio/cli/evaluate.py index 57f5c0149..a5ab682c5 100644 --- a/pyannote/audio/cli/evaluate.py +++ b/pyannote/audio/cli/evaluate.py @@ -53,14 +53,17 @@ def evaluate(cfg: DictConfig) -> Optional[float]: main_task = progress.add_task(protocol.name, total=len(files)) file_task = progress.add_task("Processing", total=1.0) - def progress_hook(completed: int, total: int): + def progress_hook(completed: int = None, total: int = None): progress.update(file_task, completed=completed / total) - inference = Inference(model, device=device, progress_hook=progress_hook) + inference = Inference(model, device=device) warm_up = cfg.warm_up / inference.duration def hypothesis(file: ProtocolFile): - return Inference.trim(binarize(inference(file)), warm_up=(warm_up, warm_up)) + return Inference.trim( + binarize(inference(file, hook=progress_hook)), + warm_up=(warm_up, warm_up), + ) metric = DiscreteDiarizationErrorRate() diff --git a/pyannote/audio/cli/pretrained.py b/pyannote/audio/cli/pretrained.py index 95a439b3f..8b8aae587 100644 --- a/pyannote/audio/cli/pretrained.py +++ b/pyannote/audio/cli/pretrained.py @@ -26,6 +26,4 @@ def pretrained(checkpoint: Text): - return Model.from_pretrained( - checkpoint, map_location=lambda storage, loc: storage - ) + return Model.from_pretrained(checkpoint, map_location=lambda storage, loc: storage) diff --git a/pyannote/audio/cli/train.py b/pyannote/audio/cli/train.py index 6e04fcde5..9ab8b1658 100644 --- a/pyannote/audio/cli/train.py +++ b/pyannote/audio/cli/train.py @@ -26,10 +26,11 @@ import hydra from hydra.utils import instantiate +from lightning.pytorch import seed_everything from omegaconf import DictConfig, OmegaConf # from pyannote.audio.core.callback import GraduallyUnfreeze -from pyannote.database import FileFinder, get_protocol +from pyannote.database import FileFinder, registry from pytorch_lightning.callbacks import ( EarlyStopping, LearningRateMonitor, @@ -37,7 +38,6 @@ RichProgressBar, ) from pytorch_lightning.loggers import TensorBoardLogger -from pytorch_lightning.utilities.seed import seed_everything from torch_audiomentations.utils.config import from_dict as get_augmentation from pyannote.audio.core.io import get_torchaudio_info @@ -45,18 +45,21 @@ @hydra.main(config_path="train_config", config_name="config") def train(cfg: DictConfig) -> Optional[float]: - # make sure to set the random seed before the instantiation of Trainer # so that each model initializes with the same weights when using DDP. seed = int(os.environ.get("PL_GLOBAL_SEED", "0")) seed_everything(seed=seed) + # load databases into registry + for database_yml in cfg.registry.split(","): + registry.load_database(database_yml) + # instantiate training protocol with optional preprocessors preprocessors = {"audio": FileFinder(), "torchaudio.info": get_torchaudio_info} if "preprocessor" in cfg: preprocessor = instantiate(cfg.preprocessor) preprocessors[preprocessor.preprocessed_key] = preprocessor - protocol = get_protocol(cfg.protocol, preprocessors=preprocessors) + protocol = registry.get_protocol(cfg.protocol, preprocessors=preprocessors) # instantiate data augmentation augmentation = ( @@ -96,7 +99,11 @@ def configure_optimizers(self): model.configure_optimizers = MethodType(configure_optimizers, model) - callbacks = [RichProgressBar(), LearningRateMonitor(logging_interval="step")] + # avoid creating big log files + callbacks = [ + RichProgressBar(refresh_rate=20, leave=True), + LearningRateMonitor(), + ] if fine_tuning: # TODO: configure layer freezing @@ -108,7 +115,7 @@ def configure_optimizers(self): checkpoint = ModelCheckpoint( monitor=monitor, mode=direction, - save_top_k=None if monitor is None else 5, + save_top_k=None if monitor is None else 1, every_n_epochs=1, save_last=True, save_weights_only=False, @@ -126,6 +133,7 @@ def configure_optimizers(self): patience=100, strict=True, verbose=False, + check_finite=True, ) callbacks.append(early_stopping) diff --git a/pyannote/audio/cli/train_config/config.yaml b/pyannote/audio/cli/train_config/config.yaml index f939f39e1..d5b761cc9 100644 --- a/pyannote/audio/cli/train_config/config.yaml +++ b/pyannote/audio/cli/train_config/config.yaml @@ -1,7 +1,8 @@ +registry: ??? protocol: ??? defaults: - - task: SpeakerSegmentation + - task: SpeakerDiarization - model: PyanNet - optimizer: Adam - scheduler: CosineAnnealingWarmRestarts diff --git a/pyannote/audio/cli/train_config/model/SSeRiouSS.yaml b/pyannote/audio/cli/train_config/model/SSeRiouSS.yaml new file mode 100644 index 000000000..73f7f963a --- /dev/null +++ b/pyannote/audio/cli/train_config/model/SSeRiouSS.yaml @@ -0,0 +1,13 @@ +# @package _group_ +_target_: pyannote.audio.models.segmentation.SSeRiouSS +wav2vec: WAVLM_BASE +wav2vec_layer: -1 +lstm: + hidden_size: 128 + num_layers: 4 + bidirectional: true + monolithic: true + dropout: 0.5 +linear: + hidden_size: 128 + num_layers: 2 diff --git a/pyannote/audio/cli/train_config/optimizer/AdamW.yaml b/pyannote/audio/cli/train_config/optimizer/AdamW.yaml new file mode 100644 index 000000000..f917fb4ed --- /dev/null +++ b/pyannote/audio/cli/train_config/optimizer/AdamW.yaml @@ -0,0 +1,7 @@ +# @package _group_ +_target_: torch.optim.AdamW +lr: 1e-3 +betas: [0.9, 0.999] +eps: 1e-08 +weight_decay: 0.01 +amsgrad: False diff --git a/pyannote/audio/cli/train_config/task/SpeakerSegmentation.yaml b/pyannote/audio/cli/train_config/task/MultiLabelSegmentation.yaml similarity index 56% rename from pyannote/audio/cli/train_config/task/SpeakerSegmentation.yaml rename to pyannote/audio/cli/train_config/task/MultiLabelSegmentation.yaml index b7b6bdc02..aceb8cb22 100644 --- a/pyannote/audio/cli/train_config/task/SpeakerSegmentation.yaml +++ b/pyannote/audio/cli/train_config/task/MultiLabelSegmentation.yaml @@ -1,11 +1,9 @@ # @package _group_ -_target_: pyannote.audio.tasks.Segmentation -duration: 5.0 +_target_: pyannote.audio.tasks.MultiLabelSegmentation +duration: 3.0 warm_up: 0.0 balance: null weight: null batch_size: 32 num_workers: null pin_memory: False -loss: "bce" -vad_loss: "bce" diff --git a/pyannote/audio/cli/train_config/task/SpeakerDiarization.yaml b/pyannote/audio/cli/train_config/task/SpeakerDiarization.yaml new file mode 100644 index 000000000..d9ec04a57 --- /dev/null +++ b/pyannote/audio/cli/train_config/task/SpeakerDiarization.yaml @@ -0,0 +1,8 @@ +# @package _group_ +_target_: pyannote.audio.tasks.SpeakerDiarization +duration: 5.0 +max_speakers_per_chunk: 3 +max_speakers_per_frame: 2 +batch_size: 32 +num_workers: 10 +pin_memory: False diff --git a/pyannote/audio/cli/train_config/trainer/default.yaml b/pyannote/audio/cli/train_config/trainer/default.yaml index eeb5b85b9..ac3a60ff4 100644 --- a/pyannote/audio/cli/train_config/trainer/default.yaml +++ b/pyannote/audio/cli/train_config/trainer/default.yaml @@ -2,46 +2,34 @@ _target_: pytorch_lightning.Trainer accelerator: auto accumulate_grad_batches: 1 -amp_backend: native -auto_lr_find: False -auto_scale_batch_size: False -auto_select_gpus: True -benchmark: False -check_val_every_n_epoch: 1 -detect_anomaly: False +benchmark: null # TODO: automatically set to True when using fixed duration chunks deterministic: False +check_val_every_n_epoch: 1 devices: auto +detect_anomaly: False enable_checkpointing: True enable_model_summary: True enable_progress_bar: True fast_dev_run: False -gpus: null -gradient_clip_val: 0 +gradient_clip_val: null gradient_clip_algorithm: norm -ipus: null limit_predict_batches: 1.0 limit_test_batches: 1.0 limit_train_batches: 1.0 limit_val_batches: 1.0 log_every_n_steps: 50 max_epochs: 1000 -max_steps: null +max_steps: -1 max_time: null min_epochs: 1 min_steps: null -move_metrics_to_cpu: False -multiple_trainloader_mode: max_size_cycle num_nodes: 1 -num_processes: 1 num_sanity_val_steps: 2 overfit_batches: 0.0 precision: 32 profiler: null reload_dataloaders_every_n_epochs: 0 -replace_sampler_ddp: True +use_distributed_sampler: True # TODO: check what this does exactly strategy: null sync_batchnorm: False -tpu_cores: null -track_grad_norm: -1 val_check_interval: 1.0 -weights_save_path: null diff --git a/pyannote/audio/core/inference.py b/pyannote/audio/core/inference.py index b664174b5..dcf21868d 100644 --- a/pyannote/audio/core/inference.py +++ b/pyannote/audio/core/inference.py @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2020-2021 CNRS +# Copyright (c) 2020- CNRS # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -23,24 +23,30 @@ import math import warnings from pathlib import Path -from typing import Any, Callable, List, Optional, Text, Tuple, Union +from typing import Callable, List, Optional, Text, Tuple, Union import numpy as np import torch +import torch.nn as nn +import torch.nn.functional as F from einops import rearrange from pyannote.core import Segment, SlidingWindow, SlidingWindowFeature from pytorch_lightning.utilities.memory import is_oom_error from pyannote.audio.core.io import AudioFile -from pyannote.audio.core.model import Model +from pyannote.audio.core.model import Model, Specifications from pyannote.audio.core.task import Resolution +from pyannote.audio.utils.multi_task import map_with_specifications from pyannote.audio.utils.permutation import mae_cost_func, permutate -from pyannote.audio.utils.progress import InferenceProgressHook +from pyannote.audio.utils.powerset import Powerset +from pyannote.audio.utils.reproducibility import fix_reproducibility -TaskName = Union[Text, None] +class BaseInference: + pass -class Inference: + +class Inference(BaseInference): """Inference Parameters @@ -50,29 +56,26 @@ class Inference: window : {"sliding", "whole"}, optional Use a "sliding" window and aggregate the corresponding outputs (default) or just one (potentially long) window covering the "whole" file or chunk. - skip_aggregation : bool, optional - Do not aggregate outputs when using "sliding" window. Defaults to False. duration : float, optional Chunk duration, in seconds. Defaults to duration used for training the model. Has no effect when `window` is "whole". step : float, optional Step between consecutive chunks, in seconds. Defaults to warm-up duration when greater than 0s, otherwise 10% of duration. Has no effect when `window` is "whole". - batch_size : int, optional - Batch size. Larger values make inference faster. Defaults to 32. - device : torch.device, optional - Device used for inference. Defaults to `model.device`. - In case `device` and `model.device` are different, model is sent to device. pre_aggregation_hook : callable, optional When a callable is provided, it is applied to the model output, just before aggregation. Takes a (num_chunks, num_frames, dimension) numpy array as input and returns a modified (num_chunks, num_frames, other_dimension) numpy array passed to overlap-add aggregation. - progress_hook : {callable, True, str}, optional - When a callable is provided, it is called everytime a batch is processed - with two integer arguments: - - the number of chunks that have been processed so far - - the total number of chunks - Set to True (or a descriptive string) to display a tqdm progress bar. + skip_aggregation : bool, optional + Do not aggregate outputs when using "sliding" window. Defaults to False. + skip_conversion: bool, optional + In case a task has been trained with `powerset` mode, output is automatically + converted to `multi-label`, unless `skip_conversion` is set to True. + batch_size : int, optional + Batch size. Larger values (should) make inference faster. Defaults to 32. + device : torch.device, optional + Device used for inference. Defaults to `model.device`. + In case `device` and `model.device` are different, model is sent to device. use_auth_token : str, optional When loading a private huggingface.co model, set `use_auth_token` to True or to a string containing your hugginface.co authentication @@ -83,71 +86,92 @@ def __init__( self, model: Union[Model, Text, Path], window: Text = "sliding", - skip_aggregation: bool = False, - device: torch.device = None, duration: float = None, step: float = None, - batch_size: int = 32, pre_aggregation_hook: Callable[[np.ndarray], np.ndarray] = None, - progress_hook: Union[bool, Text, Callable[[int, int], Any]] = False, + skip_aggregation: bool = False, + skip_conversion: bool = False, + device: torch.device = None, + batch_size: int = 32, use_auth_token: Union[Text, None] = None, ): + # ~~~~ model ~~~~~ self.model = ( model if isinstance(model, Model) else Model.from_pretrained( - Path(model), + model, map_location=device, strict=False, use_auth_token=use_auth_token, ) ) - if window not in ["sliding", "whole"]: - raise ValueError('`window` must be "sliding" or "whole".') - - specifications = self.model.specifications - if specifications.resolution == Resolution.FRAME and window == "whole": - warnings.warn( - 'Using "whole" `window` inference with a frame-based model might lead to bad results ' - 'and huge memory consumption: it is recommended to set `window` to "sliding".' - ) - - self.window = window - self.skip_aggregation = skip_aggregation - if device is None: device = self.model.device self.device = device - self.pre_aggregation_hook = pre_aggregation_hook - self.model.eval() self.model.to(self.device) - # chunk duration used during training specifications = self.model.specifications - training_duration = specifications.duration - if duration is None: - duration = training_duration - elif training_duration != duration: + # ~~~~ sliding window ~~~~~ + + if window not in ["sliding", "whole"]: + raise ValueError('`window` must be "sliding" or "whole".') + + if window == "whole" and any( + s.resolution == Resolution.FRAME for s in specifications + ): + warnings.warn( + 'Using "whole" `window` inference with a frame-based model might lead to bad results ' + 'and huge memory consumption: it is recommended to set `window` to "sliding".' + ) + self.window = window + + training_duration = next(iter(specifications)).duration + duration = duration or training_duration + if training_duration != duration: warnings.warn( f"Model was trained with {training_duration:g}s chunks, and you requested " f"{duration:g}s chunks for inference: this might lead to suboptimal results." ) self.duration = duration - self.warm_up = specifications.warm_up + # ~~~~ powerset to multilabel conversion ~~~~ + + self.skip_conversion = skip_conversion + + conversion = list() + for s in specifications: + if s.powerset and not skip_conversion: + c = Powerset(len(s.classes), s.powerset_max_classes) + else: + c = nn.Identity() + conversion.append(c.to(self.device)) + + if isinstance(specifications, Specifications): + self.conversion = conversion[0] + else: + self.conversion = nn.ModuleList(conversion) + + # ~~~~ overlap-add aggregation ~~~~~ + + self.skip_aggregation = skip_aggregation + self.pre_aggregation_hook = pre_aggregation_hook + + self.warm_up = next(iter(specifications)).warm_up # Use that many seconds on the left- and rightmost parts of each chunk # to warm up the model. While the model does process those left- and right-most # parts, only the remaining central part of each chunk is used for aggregating # scores during inference. # step between consecutive chunks - if step is None: - step = 0.1 * self.duration if self.warm_up[0] == 0.0 else self.warm_up[0] + step = step or ( + 0.1 * self.duration if self.warm_up[0] == 0.0 else self.warm_up[0] + ) if step > self.duration: raise ValueError( @@ -159,17 +183,20 @@ def __init__( self.batch_size = batch_size - if callable(progress_hook): - pass - elif isinstance(progress_hook, Text): - progress_hook = InferenceProgressHook(desc=progress_hook) - elif progress_hook: - progress_hook = InferenceProgressHook() - else: - progress_hook = None - self.progress_hook = progress_hook + def to(self, device: torch.device) -> "Inference": + """Send internal model to `device`""" + + if not isinstance(device, torch.device): + raise TypeError( + f"`device` must be an instance of `torch.device`, got `{type(device).__name__}`" + ) - def infer(self, chunks: torch.Tensor) -> np.ndarray: + self.model.to(device) + self.conversion.to(device) + self.device = device + return self + + def infer(self, chunks: torch.Tensor) -> Union[np.ndarray, Tuple[np.ndarray]]: """Forward pass Takes care of sending chunks to right device and outputs back to CPU @@ -181,11 +208,11 @@ def infer(self, chunks: torch.Tensor) -> np.ndarray: Returns ------- - outputs : (batch_size, ...) np.ndarray + outputs : (tuple of) (batch_size, ...) np.ndarray Model output. """ - with torch.no_grad(): + with torch.inference_mode(): try: outputs = self.model(chunks.to(self.device)) except RuntimeError as exception: @@ -197,9 +224,19 @@ def infer(self, chunks: torch.Tensor) -> np.ndarray: else: raise exception - return outputs.cpu().numpy() + def __convert(output: torch.Tensor, conversion: nn.Module, **kwargs): + return conversion(output).cpu().numpy() + + return map_with_specifications( + self.model.specifications, __convert, outputs, self.conversion + ) - def slide(self, waveform: torch.Tensor, sample_rate: int) -> SlidingWindowFeature: + def slide( + self, + waveform: torch.Tensor, + sample_rate: int, + hook: Optional[Callable], + ) -> Union[SlidingWindowFeature, Tuple[SlidingWindowFeature]]: """Slide model on a waveform Parameters @@ -208,26 +245,35 @@ def slide(self, waveform: torch.Tensor, sample_rate: int) -> SlidingWindowFeatur Waveform. sample_rate : int Sample rate. + hook: Optional[Callable] + When a callable is provided, it is called everytime a batch is + processed with two keyword arguments: + - `completed`: the number of chunks that have been processed so far + - `total`: the total number of chunks Returns ------- - output : SlidingWindowFeature + output : (tuple of) SlidingWindowFeature Model output. Shape is (num_chunks, dimension) for chunk-level tasks, and (num_frames, dimension) for frame-level tasks. """ - window_size: int = round(self.duration * sample_rate) + window_size: int = self.model.audio.get_num_samples(self.duration) step_size: int = round(self.step * sample_rate) - num_channels, num_samples = waveform.shape - - specifications = self.model.specifications - resolution = specifications.resolution - introspection = self.model.introspection - if resolution == Resolution.CHUNK: - frames = SlidingWindow(start=0.0, duration=self.duration, step=self.step) - elif resolution == Resolution.FRAME: - frames = introspection.frames - num_frames_per_chunk, dimension = introspection(window_size) + _, num_samples = waveform.shape + + def __frames( + example_output, specifications: Optional[Specifications] = None + ) -> SlidingWindow: + if specifications.resolution == Resolution.CHUNK: + return SlidingWindow(start=0.0, duration=self.duration, step=self.step) + return example_output.frames + + frames: Union[SlidingWindow, Tuple[SlidingWindow]] = map_with_specifications( + self.model.specifications, + __frames, + self.model.example_output, + ) # prepare complete chunks if num_samples >= window_size: @@ -244,100 +290,159 @@ def slide(self, waveform: torch.Tensor, sample_rate: int) -> SlidingWindowFeatur num_samples - window_size ) % step_size > 0 if has_last_chunk: + # pad last chunk with zeros last_chunk: torch.Tensor = waveform[:, num_chunks * step_size :] + _, last_window_size = last_chunk.shape + last_pad = window_size - last_window_size + last_chunk = F.pad(last_chunk, (0, last_pad)) - outputs: Union[List[np.ndarray], np.ndarray] = list() + def __empty_list(**kwargs): + return list() - if self.progress_hook is not None: - self.progress_hook(0, num_chunks + has_last_chunk) + outputs: Union[ + List[np.ndarray], Tuple[List[np.ndarray]] + ] = map_with_specifications(self.model.specifications, __empty_list) + + if hook is not None: + hook(completed=0, total=num_chunks + has_last_chunk) + + def __append_batch(output, batch_output, **kwargs) -> None: + output.append(batch_output) + return # slide over audio chunks in batch for c in np.arange(0, num_chunks, self.batch_size): batch: torch.Tensor = chunks[c : c + self.batch_size] - outputs.append(self.infer(batch)) - if self.progress_hook is not None: - self.progress_hook(c + self.batch_size, num_chunks + has_last_chunk) + + batch_outputs: Union[np.ndarray, Tuple[np.ndarray]] = self.infer(batch) + + _ = map_with_specifications( + self.model.specifications, __append_batch, outputs, batch_outputs + ) + + if hook is not None: + hook(completed=c + self.batch_size, total=num_chunks + has_last_chunk) # process orphan last chunk if has_last_chunk: + last_outputs = self.infer(last_chunk[None]) + + _ = map_with_specifications( + self.model.specifications, __append_batch, outputs, last_outputs + ) + + if hook is not None: + hook( + completed=num_chunks + has_last_chunk, + total=num_chunks + has_last_chunk, + ) - last_output = self.infer(last_chunk[None]) + def __vstack(output: List[np.ndarray], **kwargs) -> np.ndarray: + return np.vstack(output) - if specifications.resolution == Resolution.FRAME: - pad = num_frames_per_chunk - last_output.shape[1] - last_output = np.pad(last_output, ((0, 0), (0, pad), (0, 0))) + outputs: Union[np.ndarray, Tuple[np.ndarray]] = map_with_specifications( + self.model.specifications, __vstack, outputs + ) - outputs.append(last_output) - if self.progress_hook is not None: - self.progress_hook( - num_chunks + has_last_chunk, num_chunks + has_last_chunk + def __aggregate( + outputs: np.ndarray, + frames: SlidingWindow, + specifications: Optional[Specifications] = None, + ) -> SlidingWindowFeature: + # skip aggregation when requested, + # or when model outputs just one vector per chunk + # or when model is permutation-invariant (and not post-processed) + if ( + self.skip_aggregation + or specifications.resolution == Resolution.CHUNK + or ( + specifications.permutation_invariant + and self.pre_aggregation_hook is None + ) + ): + frames = SlidingWindow( + start=0.0, duration=self.duration, step=self.step ) + return SlidingWindowFeature(outputs, frames) + + if self.pre_aggregation_hook is not None: + outputs = self.pre_aggregation_hook(outputs) - outputs = np.vstack(outputs) - - # skip aggregation when requested, - # or when model outputs just one vector per chunk - # or when model is permutation-invariant (and not post-processed) - if ( - self.skip_aggregation - or specifications.resolution == Resolution.CHUNK - or ( - specifications.permutation_invariant - and self.pre_aggregation_hook is None + aggregated = self.aggregate( + SlidingWindowFeature( + outputs, + SlidingWindow(start=0.0, duration=self.duration, step=self.step), + ), + frames=frames, + warm_up=self.warm_up, + hamming=True, + missing=0.0, ) - ): - frames = SlidingWindow(start=0.0, duration=self.duration, step=self.step) - return SlidingWindowFeature(outputs, frames) - - if self.pre_aggregation_hook is not None: - outputs = self.pre_aggregation_hook(outputs) - - aggregated = self.aggregate( - SlidingWindowFeature( - outputs, - SlidingWindow(start=0.0, duration=self.duration, step=self.step), - ), - frames=frames, - warm_up=self.warm_up, - hamming=True, - missing=0.0, - ) - if has_last_chunk: - num_frames = aggregated.data.shape[0] - aggregated.data = aggregated.data[: num_frames - pad, :] + # remove padding that was added to last chunk + if has_last_chunk: + aggregated.data = aggregated.crop( + Segment(0.0, num_samples / sample_rate), mode="loose" + ) - return aggregated + return aggregated - def __call__(self, file: AudioFile) -> Union[SlidingWindowFeature, np.ndarray]: + return map_with_specifications( + self.model.specifications, __aggregate, outputs, frames + ) + + def __call__( + self, file: AudioFile, hook: Optional[Callable] = None + ) -> Union[ + Tuple[Union[SlidingWindowFeature, np.ndarray]], + Union[SlidingWindowFeature, np.ndarray], + ]: """Run inference on a whole file Parameters ---------- file : AudioFile Audio file. + hook : callable, optional + When a callable is provided, it is called everytime a batch is processed + with two keyword arguments: + - `completed`: the number of chunks that have been processed so far + - `total`: the total number of chunks Returns ------- - output : SlidingWindowFeature or np.ndarray + output : (tuple of) SlidingWindowFeature or np.ndarray Model output, as `SlidingWindowFeature` if `window` is set to "sliding" and `np.ndarray` if is set to "whole". """ + fix_reproducibility(self.device) + waveform, sample_rate = self.model.audio(file) if self.window == "sliding": - return self.slide(waveform, sample_rate) + return self.slide(waveform, sample_rate, hook=hook) + + outputs: Union[np.ndarray, Tuple[np.ndarray]] = self.infer(waveform[None]) - return self.infer(waveform[None])[0] + def __first_sample(outputs: np.ndarray, **kwargs) -> np.ndarray: + return outputs[0] + + return map_with_specifications( + self.model.specifications, __first_sample, outputs + ) def crop( self, file: AudioFile, chunk: Union[Segment, List[Segment]], duration: Optional[float] = None, - ) -> Union[SlidingWindowFeature, np.ndarray]: + hook: Optional[Callable] = None, + ) -> Union[ + Tuple[Union[SlidingWindowFeature, np.ndarray]], + Union[SlidingWindowFeature, np.ndarray], + ]: """Run inference on a chunk or a list of chunks Parameters @@ -354,10 +459,15 @@ def crop( Enforce chunk duration (in seconds). This is a hack to avoid rounding errors that may result in a different number of audio samples for two chunks of the same duration. + hook : callable, optional + When a callable is provided, it is called everytime a batch is processed + with two keyword arguments: + - `completed`: the number of chunks that have been processed so far + - `total`: the total number of chunks Returns ------- - output : SlidingWindowFeature or np.ndarray + output : (tuple of) SlidingWindowFeature or np.ndarray Model output, as `SlidingWindowFeature` if `window` is set to "sliding" and `np.ndarray` if is set to "whole". @@ -371,8 +481,9 @@ def crop( >>> inference.crop(file, extended_chunk).crop(chunk_of_interest, returns_data=False) """ - if self.window == "sliding": + fix_reproducibility(self.device) + if self.window == "sliding": if not isinstance(chunk, Segment): start = min(c.start for c in chunk) end = max(c.end for c in chunk) @@ -381,32 +492,37 @@ def crop( waveform, sample_rate = self.model.audio.crop( file, chunk, duration=duration ) - output = self.slide(waveform, sample_rate) - - frames = output.sliding_window - shifted_frames = SlidingWindow( - start=chunk.start, duration=frames.duration, step=frames.step - ) - return SlidingWindowFeature(output.data, shifted_frames) - - elif self.window == "whole": - - if isinstance(chunk, Segment): - waveform, sample_rate = self.model.audio.crop( - file, chunk, duration=duration - ) - else: - waveform = torch.cat( - [self.model.audio.crop(file, c)[0] for c in chunk], dim=1 + outputs: Union[ + SlidingWindowFeature, Tuple[SlidingWindowFeature] + ] = self.slide(waveform, sample_rate, hook=hook) + + def __shift(output: SlidingWindowFeature, **kwargs) -> SlidingWindowFeature: + frames = output.sliding_window + shifted_frames = SlidingWindow( + start=chunk.start, duration=frames.duration, step=frames.step ) + return SlidingWindowFeature(output.data, shifted_frames) - return self.infer(waveform[None])[0] + return map_with_specifications(self.model.specifications, __shift, outputs) + if isinstance(chunk, Segment): + waveform, sample_rate = self.model.audio.crop( + file, chunk, duration=duration + ) else: - raise NotImplementedError( - f"Unsupported window type '{self.window}': should be 'sliding' or 'whole'." + waveform = torch.cat( + [self.model.audio.crop(file, c)[0] for c in chunk], dim=1 ) + outputs: Union[np.ndarray, Tuple[np.ndarray]] = self.infer(waveform[None]) + + def __first_sample(outputs: np.ndarray, **kwargs) -> np.ndarray: + return outputs[0] + + return map_with_specifications( + self.model.specifications, __first_sample, outputs + ) + @staticmethod def aggregate( scores: SlidingWindowFeature, @@ -648,7 +764,6 @@ def always_match(this: np.ndarray, that: np.ndarray, cost: float): stitches = [] for C, (chunk, activation) in enumerate(activations): - local_stitch = np.NAN * np.zeros( (sum(lookahead) + 1, num_frames, num_classes) ) @@ -656,7 +771,6 @@ def always_match(this: np.ndarray, that: np.ndarray, cost: float): for c in range( max(0, C - lookahead[0]), min(num_chunks, C + lookahead[1] + 1) ): - # extract common temporal support shift = round((C - c) * num_frames * chunks.step / chunks.duration) @@ -677,7 +791,6 @@ def always_match(this: np.ndarray, that: np.ndarray, cost: float): ) for this, that in enumerate(permutation): - # only stitch under certain condiditions matching = (c == C) or ( match_func( diff --git a/pyannote/audio/core/io.py b/pyannote/audio/core/io.py index 49175f531..0a44e75ea 100644 --- a/pyannote/audio/core/io.py +++ b/pyannote/audio/core/io.py @@ -28,6 +28,7 @@ """ import math +import random import warnings from io import IOBase from pathlib import Path @@ -79,12 +80,14 @@ class Audio: ---------- sample_rate: int, optional Target sampling rate. Defaults to using native sampling rate. - mono : int, optional - Convert multi-channel to mono. Defaults to True. + mono : {'random', 'downmix'}, optional + In case of multi-channel audio, convert to single-channel audio + using one of the following strategies: select one channel at + 'random' or 'downmix' by averaging all channels. Usage ----- - >>> audio = Audio(sample_rate=16000, mono=True) + >>> audio = Audio(sample_rate=16000, mono='downmix') >>> waveform, sample_rate = audio({"audio": "/path/to/audio.wav"}) >>> assert sample_rate == 16000 >>> sample_rate = 44100 @@ -147,7 +150,6 @@ def validate_file(file: AudioFile) -> Mapping: raise ValueError(AudioFileDocString) if "waveform" in file: - waveform: Union[np.ndarray, Tensor] = file["waveform"] if len(waveform.shape) != 2 or waveform.shape[0] > waveform.shape[1]: raise ValueError( @@ -163,7 +165,6 @@ def validate_file(file: AudioFile) -> Mapping: file.setdefault("uri", "waveform") elif "audio" in file: - if isinstance(file["audio"], IOBase): return file @@ -174,15 +175,13 @@ def validate_file(file: AudioFile) -> Mapping: file.setdefault("uri", path.stem) else: - raise ValueError( "Neither 'waveform' nor 'audio' is available for this file." ) return file - def __init__(self, sample_rate=None, mono=True): - + def __init__(self, sample_rate=None, mono=None): super().__init__() self.sample_rate = sample_rate self.mono = mono @@ -206,8 +205,13 @@ def downmix_and_resample(self, waveform: Tensor, sample_rate: int) -> Tensor: """ # downmix to mono - if self.mono and waveform.shape[0] > 1: - waveform = waveform.mean(dim=0, keepdim=True) + num_channels = waveform.shape[0] + if num_channels > 1: + if self.mono == "random": + channel = random.randint(0, num_channels - 1) + waveform = waveform[channel : channel + 1] + elif self.mono == "downmix": + waveform = waveform.mean(dim=0, keepdim=True) # resample if (self.sample_rate is not None) and (self.sample_rate != sample_rate): @@ -249,6 +253,18 @@ def get_duration(self, file: AudioFile) -> float: return frames / sample_rate + def get_num_samples(self, duration: float, sample_rate: int = None) -> int: + """Deterministic number of samples from duration and sample rate""" + + sample_rate = sample_rate or self.sample_rate + + if sample_rate is None: + raise ValueError( + "`sample_rate` must be provided to compute number of samples." + ) + + return math.floor(duration * sample_rate) + def __call__(self, file: AudioFile) -> Tuple[Tensor, int]: """Obtain waveform @@ -277,10 +293,14 @@ def __call__(self, file: AudioFile) -> Tuple[Tensor, int]: elif "audio" in file: waveform, sample_rate = torchaudio.load(file["audio"]) + # rewind if needed + if isinstance(file["audio"], IOBase): + file["audio"].seek(0) + channel = file.get("channel", None) if channel is not None: - waveform = waveform[channel - 1 : channel] + waveform = waveform[channel : channel + 1] return self.downmix_and_resample(waveform, sample_rate) @@ -347,7 +367,6 @@ def crop( num_frames = end_frame - start_frame if mode == "raise": - if num_frames > frames: raise ValueError( f"requested fixed duration ({duration:6f}s, or {num_frames:d} frames) is longer " @@ -384,10 +403,10 @@ def crop( data, _ = torchaudio.load( file["audio"], frame_offset=start_frame, num_frames=num_frames ) + # rewind if needed if isinstance(file["audio"], IOBase): file["audio"].seek(0) except RuntimeError: - if isinstance(file["audio"], IOBase): msg = "torchaudio failed to seek-and-read in file-like object." raise RuntimeError(msg) @@ -408,7 +427,7 @@ def crop( file["sample_rate"] = sample_rate if channel is not None: - data = data[channel - 1 : channel, :] + data = data[channel : channel + 1, :] # pad with zeros if mode == "pad": diff --git a/pyannote/audio/core/model.py b/pyannote/audio/core/model.py index ddf4b2795..bedb7f6c4 100644 --- a/pyannote/audio/core/model.py +++ b/pyannote/audio/core/model.py @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2020-2021 CNRS +# Copyright (c) 2020- CNRS # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -24,6 +24,8 @@ import os import warnings +from dataclasses import dataclass +from functools import cached_property from importlib import import_module from pathlib import Path from typing import Any, Dict, List, Optional, Text, Tuple, Union @@ -33,16 +35,24 @@ import torch import torch.nn as nn import torch.optim -from huggingface_hub import cached_download, hf_hub_url +from huggingface_hub import hf_hub_download +from huggingface_hub.utils import RepositoryNotFoundError +from lightning_fabric.utilities.cloud_io import _load as pl_load from pyannote.core import SlidingWindow -from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.model_summary import ModelSummary -from semver import VersionInfo from torch.utils.data import DataLoader from pyannote.audio import __version__ from pyannote.audio.core.io import Audio -from pyannote.audio.core.task import Problem, Resolution, Specifications, Task +from pyannote.audio.core.task import ( + Problem, + Resolution, + Specifications, + Task, + UnknownSpecificationsError, +) +from pyannote.audio.utils.multi_task import map_with_specifications +from pyannote.audio.utils.version import check_version CACHE_DIR = os.getenv( "PYANNOTE_CACHE", @@ -52,195 +62,16 @@ HF_LIGHTNING_CONFIG_NAME = "config.yaml" +# NOTE: needed to backward compatibility to load models trained before pyannote.audio 3.x class Introspection: - """Model introspection - - Parameters - ---------- - min_num_samples: int - Minimum number of input samples - min_num_frames: int - Corresponding minimum number of output frames - inc_num_samples: int - Number of input samples leading to an increase of number of output frames - inc_num_frames: int - Corresponding increase in number of output frames - dimension: int - Output dimension - sample_rate: int - Expected input sample rate - - Usage - ----- - >>> introspection = Introspection.from_model(model) - >>> isinstance(introspection.frames, SlidingWindow) - >>> num_samples = 16000 # 1s at 16kHz - >>> num_frames, dimension = introspection(num_samples) - """ - - def __init__( - self, - min_num_samples: int, - min_num_frames: int, - inc_num_samples: int, - inc_num_frames: int, - dimension: int, - sample_rate: int, - ): - super().__init__() - self.min_num_samples = min_num_samples - self.min_num_frames = min_num_frames - self.inc_num_samples = inc_num_samples - self.inc_num_frames = inc_num_frames - self.dimension = dimension - self.sample_rate = sample_rate + pass - @classmethod - def from_model(cls, model: "Model", task: str = None) -> Introspection: - - specifications = model.specifications - if task is not None: - specifications = specifications[task] - - example_input_array = model.example_input_array - batch_size, num_channels, num_samples = example_input_array.shape - example_input_array = torch.randn( - (batch_size, num_channels, num_samples), - dtype=example_input_array.dtype, - layout=example_input_array.layout, - device=example_input_array.device, - requires_grad=False, - ) - # dichotomic search of "min_num_samples" - lower, upper, min_num_samples = 1, num_samples, None - while True: - num_samples = (lower + upper) // 2 - try: - with torch.no_grad(): - frames = model(example_input_array[:, :, :num_samples]) - if task is not None: - frames = frames[task] - except Exception: - lower = num_samples - else: - min_num_samples = num_samples - if specifications.resolution == Resolution.FRAME: - _, min_num_frames, dimension = frames.shape - elif specifications.resolution == Resolution.CHUNK: - _, dimension = frames.shape - else: - # should never happen - pass - upper = num_samples - - if lower + 1 == upper: - break - - # if "min_num_samples" is still None at this point, it means that - # the forward pass always failed and raised an exception. most likely, - # it means that there is a problem with the model definition. - # we try again without catching the exception to help the end user debug - # their model - if min_num_samples is None: - frames = model(example_input_array) - - # corner case for chunk-level tasks - if specifications.resolution == Resolution.CHUNK: - return cls( - min_num_samples=min_num_samples, - min_num_frames=1, - inc_num_samples=0, - inc_num_frames=0, - dimension=dimension, - sample_rate=model.hparams.sample_rate, - ) - - # search reasonable upper bound for "inc_num_samples" - while True: - num_samples = 2 * min_num_samples - example_input_array = torch.randn( - (batch_size, num_channels, num_samples), - dtype=example_input_array.dtype, - layout=example_input_array.layout, - device=example_input_array.device, - requires_grad=False, - ) - with torch.no_grad(): - frames = model(example_input_array) - if task is not None: - frames = frames[task] - num_frames = frames.shape[1] - if num_frames > min_num_frames: - break - - # dichotomic search of "inc_num_samples" - lower, upper = min_num_samples, num_samples - while True: - num_samples = (lower + upper) // 2 - example_input_array = torch.randn( - (batch_size, num_channels, num_samples), - dtype=example_input_array.dtype, - layout=example_input_array.layout, - device=example_input_array.device, - requires_grad=False, - ) - with torch.no_grad(): - frames = model(example_input_array) - if task is not None: - frames = frames[task] - num_frames = frames.shape[1] - if num_frames > min_num_frames: - inc_num_frames = num_frames - min_num_frames - inc_num_samples = num_samples - min_num_samples - upper = num_samples - else: - lower = num_samples - - if lower + 1 == upper: - break - - return cls( - min_num_samples=min_num_samples, - min_num_frames=min_num_frames, - inc_num_samples=inc_num_samples, - inc_num_frames=inc_num_frames, - dimension=dimension, - sample_rate=model.hparams.sample_rate, - ) - - def __call__(self, num_samples: int) -> Tuple[int, int]: - """Predict output shape, given number of input samples - - Parameters - ---------- - num_samples : int - Number of input samples. - - Returns - ------- - num_frames : int - Number of output frames - dimension : int - Dimension of output frames - """ - - if num_samples < self.min_num_samples: - return 0, self.dimension - - return ( - self.min_num_frames - + self.inc_num_frames - * ((num_samples - self.min_num_samples + 1) // self.inc_num_samples), - self.dimension, - ) - - @property - def frames(self) -> SlidingWindow: - # HACK to support model trained before 'sample_rate' was an Introspection attribute - sample_rate = getattr(self, "sample_rate", 16000) - step = (self.inc_num_samples / self.inc_num_frames) / sample_rate - return SlidingWindow(start=0.0, step=step, duration=step) +@dataclass +class Output: + num_frames: int + dimension: int + frames: SlidingWindow class Model(pl.LightningModule): @@ -271,42 +102,69 @@ def __init__( self.save_hyperparameters("sample_rate", "num_channels") self.task = task - self.audio = Audio( - sample_rate=self.hparams.sample_rate, mono=self.hparams.num_channels == 1 - ) - - @property - def example_input_array(self) -> torch.Tensor: - batch_size = 3 if self.task is None else self.task.batch_size - duration = 2.0 if self.task is None else self.task.duration - - return torch.randn( - ( - batch_size, - self.hparams.num_channels, - int(self.hparams.sample_rate * duration), - ), - device=self.device, - ) + self.audio = Audio(sample_rate=self.hparams.sample_rate, mono="downmix") @property - def task(self): + def task(self) -> Task: return self._task @task.setter - def task(self, task): - self._task = task - del self.introspection + def task(self, task: Task): + # reset (cached) properties when task changes del self.specifications + try: + del self.example_output + except AttributeError: + pass + self._task = task + + def build(self): + # use this method to add task-dependent layers to the model + # (e.g. the final classification and activation layers) + pass @property - def specifications(self): - if self.task is not None: - return self.task.specifications - return self._specifications + def specifications(self) -> Union[Specifications, Tuple[Specifications]]: + if self.task is None: + try: + specifications = self._specifications + + except AttributeError as e: + raise UnknownSpecificationsError( + "Model specifications are not available because it has not been assigned a task yet. " + "Use `model.task = ...` to assign a task to the model." + ) from e + + else: + try: + specifications = self.task.specifications + + except AttributeError as e: + raise UnknownSpecificationsError( + "Task specifications are not available. This is most likely because they depend on " + "the content of the training subset. Use `model.task.setup()` to go over the training " + "subset and fix this, or let lightning trainer do that for you in `trainer.fit(model)`." + ) from e + + return specifications @specifications.setter - def specifications(self, specifications): + def specifications( + self, specifications: Union[Specifications, Tuple[Specifications]] + ): + if not isinstance(specifications, (Specifications, tuple)): + raise ValueError( + "Only regular specifications or tuple of specifications are supported." + ) + + durations = set(s.duration for s in specifications) + if len(durations) > 1: + raise ValueError("All tasks must share the same (maximum) duration.") + + min_durations = set(s.min_duration for s in specifications) + if len(min_durations) > 1: + raise ValueError("All tasks must share the same minimum duration.") + self._specifications = specifications @specifications.deleter @@ -314,39 +172,54 @@ def specifications(self): if hasattr(self, "_specifications"): del self._specifications - def build(self): - # use this method to add task-dependent layers to the model - # (e.g. the final classification and activation layers) - pass + def __example_input_array(self, duration: Optional[float] = None) -> torch.Tensor: + duration = duration or next(iter(self.specifications)).duration + return torch.randn( + ( + 1, + self.hparams.num_channels, + self.audio.get_num_samples(duration), + ), + device=self.device, + ) @property - def introspection(self) -> Introspection: - """Introspection - - Returns - ------- - introspection: Introspection - Model introspection - """ - - if not hasattr(self, "_introspection"): - self._introspection = Introspection.from_model(self) - - return self._introspection + def example_input_array(self) -> torch.Tensor: + return self.__example_input_array() + + @cached_property + def example_output(self) -> Union[Output, Tuple[Output]]: + """Example output""" + example_input_array = self.__example_input_array() + with torch.inference_mode(): + example_output = self(example_input_array) + + def __example_output( + example_output: torch.Tensor, + specifications: Specifications = None, + ) -> Output: + if specifications.resolution == Resolution.FRAME: + _, num_frames, dimension = example_output.shape + frame_duration = specifications.duration / num_frames + frames = SlidingWindow(step=frame_duration, duration=frame_duration) + else: + _, dimension = example_output.shape + num_frames = None + frames = None - @introspection.setter - def introspection(self, introspection): - self._introspection = introspection + return Output( + num_frames=num_frames, + dimension=dimension, + frames=frames, + ) - @introspection.deleter - def introspection(self): - if hasattr(self, "_introspection"): - del self._introspection + return map_with_specifications( + self.specifications, __example_output, example_output + ) def setup(self, stage=None): - if stage == "fit": - self.task.setup() + self.task.setup_metadata() # list of layers before adding task-dependent layers before = set((name, id(module)) for name, module in self.named_modules()) @@ -387,8 +260,8 @@ def setup(self, stage=None): # setup custom validation metrics self.task.setup_validation_metric() - # this is to make sure introspection is performed here, once and for all - _ = self.introspection + # cache for later (and to avoid later CUDA error with multiprocessing) + _ = self.example_output # list of layers after adding task-dependent layers after = set((name, id(module)) for name, module in self.named_modules()) @@ -397,7 +270,6 @@ def setup(self, stage=None): self.task_dependent = list(name for name, _ in after - before) def on_save_checkpoint(self, checkpoint): - # put everything pyannote.audio-specific under pyannote.audio # to avoid any future conflicts with pytorch-lightning updates checkpoint["pyannote.audio"] = { @@ -409,82 +281,44 @@ def on_save_checkpoint(self, checkpoint): "module": self.__class__.__module__, "class": self.__class__.__name__, }, - "introspection": self.introspection, "specifications": self.specifications, } - @staticmethod - def check_version(library: Text, theirs: Text, mine: Text): - theirs = VersionInfo.parse(theirs) - mine = VersionInfo.parse(mine) - if theirs.major != mine.major: - warnings.warn( - f"Model was trained with {library} {theirs}, yours is {mine}. " - f"Bad things will probably happen unless you update {library} to {theirs.major}.x." - ) - if theirs.minor > mine.minor: - warnings.warn( - f"Model was trained with {library} {theirs}, yours is {mine}. " - f"This should be OK but you might want to update {library}." - ) - def on_load_checkpoint(self, checkpoint: Dict[str, Any]): - - self.check_version( + check_version( "pyannote.audio", checkpoint["pyannote.audio"]["versions"]["pyannote.audio"], __version__, + what="Model", ) - self.check_version( + check_version( "torch", checkpoint["pyannote.audio"]["versions"]["torch"], torch.__version__, + what="Model", ) - self.check_version( - "pytorch-lightning", checkpoint["pytorch-lightning_version"], pl.__version__ + check_version( + "pytorch-lightning", + checkpoint["pytorch-lightning_version"], + pl.__version__, + what="Model", ) self.specifications = checkpoint["pyannote.audio"]["specifications"] + # add task-dependent (e.g. final classifier) layers self.setup() - self.introspection = checkpoint["pyannote.audio"]["introspection"] - - def forward(self, waveforms: torch.Tensor) -> torch.Tensor: + def forward( + self, waveforms: torch.Tensor, **kwargs + ) -> Union[torch.Tensor, Tuple[torch.Tensor]]: msg = "Class {self.__class__.__name__} should define a `forward` method." raise NotImplementedError(msg) - def helper_default_activation(self, specifications: Specifications) -> nn.Module: - """Helper function for default_activation - - Parameters - ---------- - specifications: Specifications - Task specification. - - Returns - ------- - activation : nn.Module - Default activation function. - """ - - if specifications.problem == Problem.BINARY_CLASSIFICATION: - return nn.Sigmoid() - - elif specifications.problem == Problem.MONO_LABEL_CLASSIFICATION: - return nn.LogSoftmax(dim=-1) - - elif specifications.problem == Problem.MULTI_LABEL_CLASSIFICATION: - return nn.Sigmoid() - - else: - msg = "TODO: implement default activation for other types of problems" - raise NotImplementedError(msg) - # convenience function to automate the choice of the final activation function - def default_activation(self) -> nn.Module: + def default_activation(self) -> Union[nn.Module, Tuple[nn.Module]]: """Guess default activation function according to task specification * sigmoid for binary classification @@ -493,10 +327,25 @@ def default_activation(self) -> nn.Module: Returns ------- - activation : nn.Module + activation : (tuple of) nn.Module Activation. """ - return self.helper_default_activation(self.specifications) + + def __default_activation(specifications: Specifications = None) -> nn.Module: + if specifications.problem == Problem.BINARY_CLASSIFICATION: + return nn.Sigmoid() + + elif specifications.problem == Problem.MONO_LABEL_CLASSIFICATION: + return nn.LogSoftmax(dim=-1) + + elif specifications.problem == Problem.MULTI_LABEL_CLASSIFICATION: + return nn.Sigmoid() + + else: + msg = "TODO: implement default activation for other types of problems" + raise NotImplementedError(msg) + + return map_with_specifications(self.specifications, __default_activation) # training data logic is delegated to the task because the # model does not really need to know how it is being used. @@ -518,15 +367,10 @@ def val_dataloader(self) -> DataLoader: def validation_step(self, batch, batch_idx): return self.task.validation_step(batch, batch_idx) - def validation_epoch_end(self, outputs): - return self.task.validation_epoch_end(outputs) - def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=1e-3) - def _helper_up_to( - self, module_name: Text, requires_grad: bool = False - ) -> List[Text]: + def __up_to(self, module_name: Text, requires_grad: bool = False) -> List[Text]: """Helper function for freeze_up_to and unfreeze_up_to""" tokens = module_name.split(".") @@ -583,7 +427,7 @@ def freeze_up_to(self, module_name: Text) -> List[Text]: If your model does not follow a sequential structure, you might want to use freeze_by_name for more control. """ - return self._helper_up_to(module_name, requires_grad=False) + return self.__up_to(module_name, requires_grad=False) def unfreeze_up_to(self, module_name: Text) -> List[Text]: """Unfreeze model up to specific module @@ -608,9 +452,9 @@ def unfreeze_up_to(self, module_name: Text) -> List[Text]: If your model does not follow a sequential structure, you might want to use freeze_by_name for more control. """ - return self._helper_up_to(module_name, requires_grad=True) + return self.__up_to(module_name, requires_grad=True) - def _helper_by_name( + def __by_name( self, modules: Union[List[Text], Text], recurse: bool = True, @@ -625,7 +469,6 @@ def _helper_by_name( modules = [modules] for name, module in ModelSummary(self, max_depth=-1).named_modules: - if name not in modules: continue @@ -667,7 +510,7 @@ def freeze_by_name( ValueError if at least one of `modules` does not exist. """ - return self._helper_by_name( + return self.__by_name( modules, recurse=recurse, requires_grad=False, @@ -698,7 +541,7 @@ def unfreeze_by_name( ValueError if at least one of `modules` does not exist. """ - return self._helper_by_name(modules, recurse=recurse, requires_grad=True) + return self.__by_name(modules, recurse=recurse, requires_grad=True) @classmethod def from_pretrained( @@ -777,32 +620,61 @@ def from_pretrained( model_id = checkpoint revision = None - url = hf_hub_url( - model_id, filename=HF_PYTORCH_WEIGHTS_NAME, revision=revision - ) - path_for_pl = cached_download( - url=url, - library_name="pyannote", - library_version=__version__, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - ) + try: + path_for_pl = hf_hub_download( + model_id, + HF_PYTORCH_WEIGHTS_NAME, + repo_type="model", + revision=revision, + library_name="pyannote", + library_version=__version__, + cache_dir=cache_dir, + # force_download=False, + # proxies=None, + # etag_timeout=10, + # resume_download=False, + use_auth_token=use_auth_token, + # local_files_only=False, + # legacy_cache_layout=False, + ) + except RepositoryNotFoundError: + print( + f""" +Could not download '{model_id}' model. +It might be because the model is private or gated so make +sure to authenticate. Visit https://hf.co/settings/tokens to +create your access token and retry with: + + >>> Model.from_pretrained('{model_id}', + ... use_auth_token=YOUR_AUTH_TOKEN) + +If this still does not work, it might be because the model is gated: +visit https://hf.co/{model_id} to accept the user conditions.""" + ) + return None # HACK Huggingface download counters rely on config.yaml # HACK Therefore we download config.yaml even though we # HACK do not use it. Fails silently in case model does not # HACK have a config.yaml file. try: - config_url = hf_hub_url( - model_id, filename=HF_LIGHTNING_CONFIG_NAME, revision=revision - ) - _ = cached_download( - url=config_url, + _ = hf_hub_download( + model_id, + HF_LIGHTNING_CONFIG_NAME, + repo_type="model", + revision=revision, library_name="pyannote", library_version=__version__, cache_dir=cache_dir, + # force_download=False, + # proxies=None, + # etag_timeout=10, + # resume_download=False, use_auth_token=use_auth_token, + # local_files_only=False, + # legacy_cache_layout=False, ) + except Exception: pass diff --git a/pyannote/audio/core/pipeline.py b/pyannote/audio/core/pipeline.py index 90daf297c..f844d584f 100644 --- a/pyannote/audio/core/pipeline.py +++ b/pyannote/audio/core/pipeline.py @@ -22,21 +22,27 @@ import os import warnings +from collections import OrderedDict from collections.abc import Iterator from functools import partial from pathlib import Path -from typing import Callable, List, Optional, Text, Union +from typing import Callable, Dict, List, Optional, Text, Union +import torch import yaml -from huggingface_hub import cached_download, hf_hub_url - -from pyannote.audio import Audio, __version__ -from pyannote.audio.core.io import AudioFile -from pyannote.audio.core.model import CACHE_DIR +from huggingface_hub import hf_hub_download +from huggingface_hub.utils import RepositoryNotFoundError from pyannote.core.utils.helper import get_class_by_name from pyannote.database import FileFinder, ProtocolFile from pyannote.pipeline import Pipeline as _Pipeline +from pyannote.audio import Audio, __version__ +from pyannote.audio.core.inference import BaseInference +from pyannote.audio.core.io import AudioFile +from pyannote.audio.core.model import CACHE_DIR, Model +from pyannote.audio.utils.reproducibility import fix_reproducibility +from pyannote.audio.utils.version import check_version + PIPELINE_PARAMS_NAME = "config.yaml" @@ -77,25 +83,57 @@ def from_pretrained( else: model_id = checkpoint_path revision = None - url = hf_hub_url(model_id, filename=PIPELINE_PARAMS_NAME, revision=revision) - - config_yml = cached_download( - url=url, - library_name="pyannote", - library_version=__version__, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - ) + + try: + config_yml = hf_hub_download( + model_id, + PIPELINE_PARAMS_NAME, + repo_type="model", + revision=revision, + library_name="pyannote", + library_version=__version__, + cache_dir=cache_dir, + # force_download=False, + # proxies=None, + # etag_timeout=10, + # resume_download=False, + use_auth_token=use_auth_token, + # local_files_only=False, + # legacy_cache_layout=False, + ) + + except RepositoryNotFoundError: + print( + f""" +Could not download '{model_id}' pipeline. +It might be because the pipeline is private or gated so make +sure to authenticate. Visit https://hf.co/settings/tokens to +create your access token and retry with: + + >>> Pipeline.from_pretrained('{model_id}', + ... use_auth_token=YOUR_AUTH_TOKEN) + +If this still does not work, it might be because the pipeline is gated: +visit https://hf.co/{model_id} to accept the user conditions.""" + ) + return None with open(config_yml, "r") as fp: config = yaml.load(fp, Loader=yaml.SafeLoader) + if "version" in config: + check_version( + "pyannote.audio", config["version"], __version__, what="Pipeline" + ) + # initialize pipeline pipeline_name = config["pipeline"]["name"] Klass = get_class_by_name( pipeline_name, default_module_name="pyannote.pipeline.blocks" ) - pipeline = Klass(**config["pipeline"].get("params", {})) + params = config["pipeline"].get("params", {}) + params.setdefault("use_auth_token", use_auth_token) + pipeline = Klass(**params) # freeze parameters if "freeze" in config: @@ -111,7 +149,6 @@ def from_pretrained( if "preprocessors" in config: preprocessors = {} for key, preprocessor in config.get("preprocessors", {}).items(): - # preprocessors: # key: # name: package.module.ClassName @@ -139,22 +176,98 @@ def from_pretrained( pipeline.preprocessors = preprocessors + # send pipeline to specified device + if "device" in config: + device = torch.device(config["device"]) + try: + pipeline.to(device) + except RuntimeError as e: + print(e) + return pipeline - @staticmethod - def setup_hook(file: AudioFile, hook: Optional[Callable] = None) -> Callable: + def __init__(self): + super().__init__() + self._models: Dict[str, Model] = OrderedDict() + self._inferences: Dict[str, BaseInference] = OrderedDict() + + def __getattr__(self, name): + """(Advanced) attribute getter + + Adds support for Model and Inference attributes, + which are iterated over by Pipeline.to() method. + + See pyannote.pipeline.Pipeline.__getattr__. + """ + + if "_models" in self.__dict__: + _models = self.__dict__["_models"] + if name in _models: + return _models[name] + + if "_inferences" in self.__dict__: + _inferences = self.__dict__["_inferences"] + if name in _inferences: + return _inferences[name] + + return super().__getattr__(name) + + def __setattr__(self, name, value): + """(Advanced) attribute setter + + Adds support for Model and Inference attributes, + which are iterated over by Pipeline.to() method. + + See pyannote.pipeline.Pipeline.__setattr__. + """ + + def remove_from(*dicts): + for d in dicts: + if name in d: + del d[name] + + _parameters = self.__dict__.get("_parameters") + _instantiated = self.__dict__.get("_instantiated") + _pipelines = self.__dict__.get("_pipelines") + _models = self.__dict__.get("_models") + _inferences = self.__dict__.get("_inferences") + + if isinstance(value, Model): + if _models is None: + msg = "cannot assign models before Pipeline.__init__() call" + raise AttributeError(msg) + remove_from( + self.__dict__, _inferences, _parameters, _instantiated, _pipelines + ) + _models[name] = value + return + + if isinstance(value, BaseInference): + if _inferences is None: + msg = "cannot assign inferences before Pipeline.__init__() call" + raise AttributeError(msg) + remove_from(self.__dict__, _models, _parameters, _instantiated, _pipelines) + _inferences[name] = value + return + + super().__setattr__(name, value) - if hook is None: + def __delattr__(self, name): + if name in self._models: + del self._models[name] - def hook(*args, **kwargs): - return + elif name in self._inferences: + del self._inferences[name] - hook.missing = True else: - hook = partial(hook, file=file) - hook.missing = False + super().__delattr__(name) - return hook + @staticmethod + def setup_hook(file: AudioFile, hook: Optional[Callable] = None) -> Callable: + def noop(*args, **kwargs): + return + + return partial(hook or noop, file=file) def default_parameters(self): raise NotImplementedError() @@ -181,6 +294,8 @@ def classes(self) -> Union[List, Iterator]: raise NotImplementedError() def __call__(self, file: AudioFile, **kwargs): + fix_reproducibility(getattr(self, "device", torch.device("cpu"))) + if not self.instantiated: # instantiate with default parameters when available try: @@ -208,3 +323,25 @@ def __call__(self, file: AudioFile, **kwargs): file = ProtocolFile(file, lazy=self.preprocessors) return self.apply(file, **kwargs) + + def to(self, device: torch.device): + """Send pipeline to `device`""" + + if not isinstance(device, torch.device): + raise TypeError( + f"`device` must be an instance of `torch.device`, got `{type(device).__name__}`" + ) + + for _, pipeline in self._pipelines.items(): + if hasattr(pipeline, "to"): + _ = pipeline.to(device) + + for _, model in self._models.items(): + _ = model.to(device) + + for _, inference in self._inferences.items(): + _ = inference.to(device) + + self.device = device + + return self diff --git a/pyannote/audio/core/task.py b/pyannote/audio/core/task.py index fb42fb7cc..1edfbc35c 100644 --- a/pyannote/audio/core/task.py +++ b/pyannote/audio/core/task.py @@ -23,29 +23,23 @@ from __future__ import annotations -from functools import partial - -try: - from functools import cached_property -except ImportError: - from backports.cached_property import cached_property - import multiprocessing import sys import warnings from dataclasses import dataclass from enum import Enum +from functools import cached_property, partial from numbers import Number -from typing import Dict, List, Optional, Sequence, Text, Tuple, Union +from typing import Dict, List, Literal, Optional, Sequence, Text, Tuple, Union import pytorch_lightning as pl +import scipy.special import torch from pyannote.database import Protocol from torch.utils.data import DataLoader, Dataset, IterableDataset from torch_audiomentations import Identity from torch_audiomentations.core.transforms_interface import BaseWaveformTransform from torchmetrics import Metric, MetricCollection -from typing_extensions import Literal from pyannote.audio.utils.loss import binary_cross_entropy, nll_loss from pyannote.audio.utils.protocol import check_protocol @@ -69,14 +63,20 @@ class Resolution(Enum): CHUNK = 2 # model outputs just one vector for the whole chunk +class UnknownSpecificationsError(Exception): + pass + + @dataclass class Specifications: problem: Problem resolution: Resolution - # chunk duration in seconds. - # use None for variable-length chunks - duration: Optional[float] = None + # (maximum) chunk duration in seconds + duration: float + + # (for variable-duration tasks only) minimum chunk duration in seconds + min_duration: Optional[float] = None # use that many seconds on the left- and rightmost parts of each chunk # to warm up the model. This is mostly useful for segmentation tasks. @@ -89,9 +89,43 @@ class Specifications: # (for classification tasks only) list of classes classes: Optional[List[Text]] = None + # (for powerset only) max number of simultaneous classes + # (n choose k with k <= powerset_max_classes) + powerset_max_classes: Optional[int] = None + # whether classes are permutation-invariant (e.g. diarization) permutation_invariant: bool = False + @cached_property + def powerset(self) -> bool: + if self.powerset_max_classes is None: + return False + + if self.problem != Problem.MONO_LABEL_CLASSIFICATION: + raise ValueError( + "`powerset_max_classes` only makes sense with multi-class classification problems." + ) + + return True + + @cached_property + def num_powerset_classes(self) -> int: + # compute number of subsets of size at most "powerset_max_classes" + # e.g. with len(classes) = 3 and powerset_max_classes = 2: + # {}, {0}, {1}, {2}, {0, 1}, {0, 2}, {1, 2} + return int( + sum( + scipy.special.binom(len(self.classes), i) + for i in range(0, self.powerset_max_classes + 1) + ) + ) + + def __len__(self): + return 1 + + def __iter__(self): + yield self + class TrainDataset(IterableDataset): def __init__(self, task: Task): @@ -165,7 +199,7 @@ class Task(pl.LightningDataModule): Attributes ---------- - specifications : Specifications or dict of Specifications + specifications : Specifications or tuple of Specifications Task specifications (available after `Task.setup` has been called.) """ @@ -184,7 +218,10 @@ def __init__( super().__init__() # dataset - self.protocol, self.has_validation = check_protocol(protocol) + self.protocol, checks = check_protocol(protocol) + self.has_validation = checks["has_validation"] + self.has_scope = checks["has_scope"] + self.has_classes = checks["has_classes"] # batching self.duration = duration @@ -231,7 +268,28 @@ def prepare_data(self): """ pass - def setup(self, stage: Optional[str] = None): + @property + def specifications(self) -> Union[Specifications, Tuple[Specifications]]: + # setup metadata on-demand the first time specifications are requested and missing + if not hasattr(self, "_specifications"): + self.setup_metadata() + return self._specifications + + @specifications.setter + def specifications( + self, specifications: Union[Specifications, Tuple[Specifications]] + ): + self._specifications = specifications + + @property + def has_setup_metadata(self): + return getattr(self, "_has_setup_metadata", False) + + @has_setup_metadata.setter + def has_setup_metadata(self, value: bool): + self._has_setup_metadata = value + + def setup_metadata(self): """Called at the beginning of training at the very beginning of Model.setup(stage="fit") Notes @@ -241,7 +299,10 @@ def setup(self, stage: Optional[str] = None): If `specifications` attribute has not been set in `__init__`, `setup` is your last chance to set it. """ - pass + + if not self.has_setup_metadata: + self.setup() + self.has_setup_metadata = True def setup_loss_func(self): pass @@ -270,18 +331,6 @@ def train_dataloader(self) -> DataLoader: collate_fn=partial(self.collate_fn, stage="train"), ) - @cached_property - def logging_prefix(self): - - prefix = f"{self.__class__.__name__}-" - if hasattr(self.protocol, "name"): - # "." has a special meaning for pytorch-lightning checkpointing - # so we remove dots from protocol names - name_without_dots = "".join(self.protocol.name.split(".")) - prefix += f"{name_without_dots}-" - - return prefix - def default_loss( self, specifications: Specifications, target, prediction, weight=None ) -> torch.Tensor: @@ -314,7 +363,7 @@ def default_loss( ]: return binary_cross_entropy(prediction, target, weight=weight) - elif specifications.problem == Problem.MONO_LABEL_CLASSIFICATION: + elif specifications.problem in [Problem.MONO_LABEL_CLASSIFICATION]: return nll_loss(prediction, target, weight=weight) else: @@ -345,6 +394,11 @@ def common_step(self, batch, batch_idx: int, stage: Literal["train", "val"]): {"loss": loss} """ + if isinstance(self.specifications, tuple): + raise NotImplementedError( + "Default training/validation step is not implemented for multi-task." + ) + # forward pass y_pred = self.model(batch["X"]) @@ -370,12 +424,17 @@ def common_step(self, batch, batch_idx: int, stage: Literal["train", "val"]): # compute loss loss = self.default_loss(self.specifications, y, y_pred, weight=weight) + + # skip batch if something went wrong for some reason + if torch.isnan(loss): + return None + self.model.log( - f"{self.logging_prefix}{stage.capitalize()}Loss", + f"loss/{stage}", loss, on_step=False, on_epoch=True, - prog_bar=True, + prog_bar=False, logger=True, ) return {"loss": loss} @@ -413,9 +472,6 @@ def val_dataloader(self) -> Optional[DataLoader]: def validation_step(self, batch, batch_idx: int): return self.common_step(batch, batch_idx, "val") - def validation_epoch_end(self, outputs): - pass - def default_metric(self) -> Union[Metric, Sequence[Metric], Dict[str, Metric]]: """Default validation metric""" msg = f"Missing '{self.__class__.__name__}.default_metric' method." @@ -426,7 +482,7 @@ def metric(self) -> MetricCollection: if self._metric is None: self._metric = self.default_metric() - return MetricCollection(self._metric, prefix=self.logging_prefix) + return MetricCollection(self._metric) def setup_validation_metric(self): metric = self.metric diff --git a/pyannote/audio/interactive/common/commands.png b/pyannote/audio/interactive/common/commands.png deleted file mode 100755 index 29517df5a..000000000 Binary files a/pyannote/audio/interactive/common/commands.png and /dev/null differ diff --git a/pyannote/audio/interactive/common/controller.js b/pyannote/audio/interactive/common/controller.js deleted file mode 100644 index cbffc2899..000000000 --- a/pyannote/audio/interactive/common/controller.js +++ /dev/null @@ -1,367 +0,0 @@ -// The MIT License (MIT) -// -// Copyright (c) 2021 CNRS -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Can't create constant because prodigy reload this file every batch -var currentRegion = 0; -var regions = null; -var ids = null; -var refresh = true; - -var left = 'ArrowLeft'; -var right = 'ArrowRight'; -var startR = 'Shift'; -var endR = 'Control'; - -var PRECISION = (prodigy.config.precision / 1000); -var BEEP = prodigy.config.beep; -var EXCERPT = 1; - -var keysMap = {}; - -/** -* Handle web audio for beep -* @see beep() -*/ -var audioCtx = new(window.AudioContext || window.webkitAudioContext)(); - -/** -* Makes sure that the document is loaded before executing waitForElement() -* Add a small timeout if wavesurfer is already defined (useful when a new batch is coming), -* the time for prodigy to update wavesurfer. -* @see waitForElement() -*/ -if(document.readyState !== 'loading') { - if(typeof window.wavesurfer !== "undefined"){ - setTimeout(waitForElement,25); - }else{ - waitForElement(); - } -} else { - document.addEventListener('DOMContentLoaded', function () { - waitForElement(); - }); -} - -/** -* Compare if a region is before or after another one -* Useful to sort global variable 'ids' -* @see reloadWave() -* @param {region1} Region object -* @param {region2} Region Object -* @return {number} -1 if region1 start before, 1 otherwise, 0 if the identical -*/ -function compare(region1, region2){ - if(region1.start < region2.start){ - return -1; - }else if (region1.start > region2.start){ - return 1; - }else{ - return 0; - } -} - -/** -* Simulate a click on prodigy's label radio button -* Only useful for "review recipe" -* Note: might break with future versions of Prodigy -* @param {label} Label to click on -*/ -function clickOnLabel(label){ - document.querySelector("input[type=radio][value=\'"+label+"\']").click() -} - -/** -* Update prodigy.content with all regions from window.wavesurfer.regions.list (thus, all regions that can be seen in the interface) -* Discuss in this issue : -* https://support.prodi.gy/t/weird-interaction-between-window-prodigy-update-and-wavesurfer/5450 -*/ -function updateContent(){ - var regions = window.wavesurfer.regions.list; - var content = []; - for (var id in regions){ - var region = regions[id]; - content.push({start : region.start, end : region.end, label : region.label, id : region.id, color : region.color}); - } - window.prodigy.update({audio_spans : content}); -} - -/** -* Create a beep sound from scratch -* You can adjust the gain, frequency (here its A 440) and duration -*/ -function beep() { - if(BEEP){ - var oscillator = audioCtx.createOscillator(); - var gainNode = audioCtx.createGain(); - - oscillator.connect(gainNode); - gainNode.connect(audioCtx.destination); - - gainNode.gain.value = 0.1; - oscillator.frequency.value = 440; - oscillator.type = "square"; - - oscillator.start(); - - setTimeout( - function() { - oscillator.stop(); - }, - 150 // FIXME: should depend on the value of "precision" - ); - } -} - -/** -* Change CSS style for the selected region -* @param {e} Region object -*/ -function activeRegion(e){ - e.element.style.borderTop = "3px solid"; - e.element.style.borderBottom = "3px solid"; -} - -/** -* @see activeRegion() -* Undo CSS change -* @param {e} Region object -*/ -function deactiveRegion(e){ - e.element.style.borderTop = ""; - e.element.style.borderBottom = ""; -} - -/** -* Update global variables 'regions' and 'ids' with regions in window.wavesurfer.regions.list -* Put the first one as "active" and update the variable currentRegion -*/ -function reloadWave(){ - regions = window.wavesurfer.regions.list; - ids = Object.values(regions); - ids.sort(compare); - if(ids.length > 0){ - currentRegion = 0; - activeRegion(ids[0]); - } -} - -/** -* Switch selected region -* Update var currentRegion -* Place wavesurfer cursor at the beginning of the new region or the beginning of the file if it's a new prodigy task -* @see activeRegion() / deactiveRegion() -* @param {ids} Ids of the region to be selected -*/ -function switchCurrent(newId){ - if(ids.length > 0){ - deactiveRegion(ids[currentRegion]); - currentRegion = newId; - activeRegion(ids[newId]) - if(refresh){ - window.wavesurfer.seekTo(0); - }else{ - var time = (ids[currentRegion].start) / (window.wavesurfer.getDuration()); - window.wavesurfer.seekTo(time); - } - } -} - -/** -* Handle wavesurfer regions -* Add event listener to some wavesurfer event -*/ -function waitForElement(){ - if(typeof window.wavesurfer !== "undefined"){ - reloadWave(); - // Select created region or the first one if it's a new task - window.wavesurfer.on('region-created', function(e){ - setTimeout(function(){ - if(ids.length > 0) deactiveRegion(ids[currentRegion]); - reloadWave(); - if(refresh){ - switchCurrent(0); - }else{ - switchCurrent(ids.indexOf(e)); - } - }, 5); - }); - // Change region label (by remove the old one and create a new one with proper label) - window.wavesurfer.on('region-dblclick',function(e){ - re = window.wavesurfer.addRegion({'start' : e.start,'end' : e.end}); - e.remove(); - window.wavesurfer.fireEvent('region-update-end',re); - }); - // Select region on click - window.wavesurfer.on('region-click',function(e){ - switchCurrent(ids.indexOf(e)); - }); - // Beep when region end - window.wavesurfer.on('region-out',function(e){ - beep(); - }); - // @see updateContent() - window.wavesurfer.on('region-update-end', function(e){ - updateContent(); - }); - // @see updateContent() - // Switch selected region when deleted - window.wavesurfer.on('region-removed',function(e){ - updateContent(); - if(currentRegion == (ids.length - 1)){ - var newId = 0; - }else{ - var newId = currentRegion; - } - reloadWave(); - if(ids.length > 0) switchCurrent(newId); - }); - }else{ - setTimeout(waitForElement, 250); - } -} - -// Check if it's a new prodigy task -document.addEventListener('prodigyanswer', e => { - refresh = true; -}) - -/** -* Keyboard controller -* | Key 1 | Key 2 | Command | -* | ------------- | ------------- | ------------ | -* | Arrows left/right | [W] | Move Cursor [speed up] | -* | Shift | Arrows left/right | Change start of current segment | -* | Control | Arrows left/right | Change end of current segment | -* | Arrows up/down | | Change current segment to the next/precedent one | -* | Shift | Arrows up/[down] | Create [or remove] segment | -* | Backspace | | Remove current segment | -*/ -document.querySelector('#root').onkeydown = document.querySelector('#root').onkeyup = function(e){ - e = e || event; - keysMap[e.key] = e.type == 'keydown'; - var pos = window.wavesurfer.getCurrentTime(); - var audioEnd = window.wavesurfer.getDuration(); - var region = ids[currentRegion]; - refresh = false; - - // If Left is pressed - if(keysMap[left] && !keysMap[right]){ - // If Shift is pressed - if(keysMap[startR] && !keysMap[endR]){ - // Shortens start if possible - if((region.start - PRECISION) <= 0){ - region.update({'start' : 0}); - window.wavesurfer.fireEvent('region-update-end',region); - window.wavesurfer.play(0, region.end); - }else{ - region.update({'start' : region.start - PRECISION }); - window.wavesurfer.fireEvent('region-update-end',region); - window.wavesurfer.play(region.start, region.end); - } - // If Ctrl is pressed - }else if(keysMap[endR] && !keysMap[startR]){ - var startTime = region.end - EXCERPT; - if(startTime < region.start) startTime = region.start; - // Shortens end if possible - if((region.end - PRECISION) > region.start){ - region.update({'end' : region.end - PRECISION }); - window.wavesurfer.fireEvent('region-update-end',region); - window.wavesurfer.play(startTime, region.end); - } - }else{ - // Else change cursor position - // Speed up naviguation if W is pressed - if(keysMap['w']){ - var time = (pos - PRECISION*2) / audioEnd; - }else{ - var time = (pos - PRECISION) / audioEnd; - } - if(time < 0) time = 0; - window.wavesurfer.pause(); - window.wavesurfer.seekTo(time); - } - // If Right is pressed - }else if(keysMap[right] && !keysMap[left]){ - // If Shift is pressed - if(keysMap[startR] && !keysMap[endR]){ - // Extend start if possible - if(region.start + PRECISION < region.end){ - region.update({'start' : region.start + PRECISION }); - window.wavesurfer.fireEvent('region-update-end',region); - window.wavesurfer.play(region.start, region.end); - } - // If Ctrl is pressed - }else if(keysMap[endR] && !keysMap[startR]){ - // Extend end if possible (while keep playing the audio) - if(!window.wavesurfer.isPlaying()){ - var startTime = region.end - EXCERPT; - if(startTime < region.start) startTime = region.start; - }else{ - var startTime = pos; - } - if((region.end + PRECISION) >= audioEnd){ - region.update({'end' : audioEnd }); - window.wavesurfer.fireEvent('region-update-end',region); - }else{ - region.update({'end' : region.end + PRECISION }); - window.wavesurfer.fireEvent('region-update-end',region); - } - window.wavesurfer.play(startTime, region.end); - }else{ - // Else change cursor position - // Speed up naviguation if W is pressed - if(keysMap['w']){ - var time = (pos + PRECISION*2) / audioEnd; - }else{ - var time = (pos + PRECISION) / audioEnd; - } - if(time > 1) time = 1; - window.wavesurfer.pause(); - window.wavesurfer.seekTo(time); - } - // If Up and shift is pressed : new region - }else if (keysMap['ArrowUp'] && keysMap['Shift']){ - var fin = pos + 1; - if(fin > audioEnd) fin = audioEnd; - re = window.wavesurfer.addRegion({'start' : pos,'end' : fin}); - window.wavesurfer.fireEvent('region-update-end',re); - // If Down and Shift or Backspace: delete region - // Check backspace for diarization text field - }else if(keysMap['Backspace'] || (keysMap['ArrowDown'] && keysMap['Shift'])){ - ids[currentRegion].remove(); - // If Up/Down @see switchCurrent - }else if(keysMap['ArrowUp']){ - if(currentRegion == (ids.length - 1)){ - switchCurrent(0); - }else{ - switchCurrent(currentRegion + 1); - } - }else if(keysMap['ArrowDown']){ - if(currentRegion == 0){ - switchCurrent(ids.length - 1); - }else{ - switchCurrent(currentRegion - 1); - } - }else if(keysMap['u']){ - reloadWave(); - } -} diff --git a/pyannote/audio/interactive/common/instructions.html b/pyannote/audio/interactive/common/instructions.html deleted file mode 100644 index 0b75a98c1..000000000 --- a/pyannote/audio/interactive/common/instructions.html +++ /dev/null @@ -1,69 +0,0 @@ - - - -

Commands

- - - You have to mark the speech moments with bounding boxes. -
- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Key 1Key 2Command
Arrows left/right[W]Move Cursor [speed up]
ShiftArrow left/rightChange start of current segment
ControlArrow left/rightChange end of current segment
Arrows up/downChange current segment to the next/precedent one
ShiftArrow up/[down]Create or [remove] segment
BackspaceRemove current segment
SpacebarPlay/pause audio
UUndo all actions
EscapeIgnore this sample
EnterValidate annotation
- - diff --git a/pyannote/audio/interactive/common/regions.js b/pyannote/audio/interactive/common/regions.js deleted file mode 100644 index c40b15cd1..000000000 --- a/pyannote/audio/interactive/common/regions.js +++ /dev/null @@ -1,1362 +0,0 @@ -/*! - * wavesurfer.js regions plugin 5.2.0 (2021-08-16) - * https://wavesurfer-js.org - * @license BSD-3-Clause - */ -(function webpackUniversalModuleDefinition(root, factory) { - if(typeof exports === 'object' && typeof module === 'object') - module.exports = factory(); - else if(typeof define === 'function' && define.amd) - define("WaveSurfer", [], factory); - else if(typeof exports === 'object') - exports["WaveSurfer"] = factory(); - else - root["WaveSurfer"] = root["WaveSurfer"] || {}, root["WaveSurfer"]["regions"] = factory(); -})(self, function() { -return /******/ (() => { // webpackBootstrap -/******/ "use strict"; -/******/ var __webpack_modules__ = ({ - -/***/ "./src/plugin/regions/index.js": -/*!*************************************!*\ - !*** ./src/plugin/regions/index.js ***! - \*************************************/ -/***/ ((module, exports, __webpack_require__) => { - - - -Object.defineProperty(exports, "__esModule", ({ - value: true -})); -exports.default = void 0; - -var _region = __webpack_require__(/*! ./region.js */ "./src/plugin/regions/region.js"); - -function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; } - -function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } - -function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } - -function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } - -function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } - -function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } - -/** - * Regions are visual overlays on waveform that can be used to play and loop - * portions of audio. Regions can be dragged and resized. - * - * Visual customization is possible via CSS (using the selectors - * `.wavesurfer-region` and `.wavesurfer-handle`). - * - * @implements {PluginClass} - * @extends {Observer} - * - * @example - * // es6 - * import RegionsPlugin from 'wavesurfer.regions.js'; - * - * // commonjs - * var RegionsPlugin = require('wavesurfer.regions.js'); - * - * // if you are using " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "b950f7fe8ea34776bde01ed2f4244f5f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving sample.wav to sample.wav\n" + ] }, - "f7b175bb312c4f43809c97f98e6e84a3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import google.colab\n", + "own_file, _ = google.colab.files.upload().popitem()\n", + "OWN_FILE = {'audio': own_file}\n", + "notebook.reset()\n", + "\n", + "# load audio waveform and play it\n", + "waveform, sample_rate = Audio(mono=\"downmix\")(OWN_FILE)\n", + "IPythonAudio(data=waveform.squeeze(), rate=sample_rate, autoplay=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ctw4nLaPYnp_" + }, + "source": [ + "Simply replace `DEMO_FILE` by `OWN_FILE` in the rest of the notebook.\n", + "\n", + "Note, however, that unless you provide a groundtruth annotation in the next cell, you will (obviously) not be able to visualize groundtruth annotation nor evaluate the performance of the diarization pipeline quantitatively" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x9AQgDzFYnp_" + }, + "source": [ + "## Upload groundtruth (optional)\n", + "\n", + "The groundtruth file is expected to use the RTTM format, with one line per speech turn with the following convention:\n", + "\n", + "```\n", + "SPEAKER {file_name} 1 {start_time} {duration} {speaker_name} \n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iZaFudpDYnp_", + "outputId": "981274fa-e654-4091-c838-91c81f921e5d", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, - "e0c6b8aff2ad4849b3dac392ee388112": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving sample.rttm to sample.rttm\n" + ] }, - "8ddd76365cfd4408b8ca12309b183967": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABHQAAACsCAYAAAAaLvvnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOHUlEQVR4nO3de6ykZ10H8O+v3YIGCghbG1yqC+WiBWwpa9OKJk2DbQUVURRISCDyhxowXNQEFOzWqEnBtl4AjQVCDYSLgFpBqA1ZBJWCp1As5aJtbFPWUkStbVHLpT//mJdwaLuX2Z1zZp6zn08yOe95b/ObeeeZ951vnmemujsAAAAAjOOoZRcAAAAAwHwEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoTKrqeVX1msPY/uSq+khVXVNVf1VVD1i37OVVdV1Vfa6qzllMxVvbRh2PqnpIVe2pqjsOZ/8AAACwTAKdBaiqo5O8PsnLuvvxSf48ya9Oy05K8qwkj01ybpLXTeuzQfZ3PJL8X5JXJvmVJZUHAAAAh22oQKeq7ldV762qT1bVp6rqmVV1Q1W9auqJ8bGqeuS07nFV9a6q+sfp9qRp/mlTz41PVNU/VNVj7uV+njqts72qzp6mP15Vf1ZV95/WuaGqLqiqjyf5mSSPTvKhaRdXJPnpafppSd7W3Xd2978muS7JaRv6RG2SEY9Hd3+5u/8us2AHAAAAhjRUoJNZD5d/6+6Tu/txSd4/zf/vqSfGa5L83jTv95Nc3N0/kNmH+ddP8z+b5Ie7+wlJfiPJ76y/g6p6epKXJXnKNOsVSZ7c3acmWUvy0nWr/0d3n9rdb0tybWbhTTILFE6YpnckuWndNp+f5m0FIx4PAAAAGN62w9l4744Tdic5bzGlJEnO37H3pt37WX5Nkgur6oIk7+nuD1dVkrx1Wv7WJBdP009OctK0PEkeMPXmeGCSS6vqUUk6yTHr9n9Wkl1Jzu7u26rqx5KclOTvp/3cJ8lH1q3/9nXTP5fkD6rqlUkuS/KVg37UC3L6eZfvzoKPx5Xnn7N7P8sdDwAAAFiCwwp0Nlt3/3NVnZpZb43fqqoPfGPR+tWmv0clOb27v2VozfRFuHu6++lVtTPJB9ctvj7JIzIbrrOWpJJc0d3P3kdJX15X22eTnD3dx6OTPHVatDff2jvkYdO84Q16PAAAAGB4Qw25qqrvSvI/3f3mJK9Ocuq06Jnr/n6jx8bfJPmlddueMk0+MN8MVJ53t7u4MbPhQH9aVY9NcmWSJ637Hpj7TeHAvdX2ndPfozIbFvTH06LLkjyrqu5bVQ9P8qgkH5vjYa+sQY8HAAAADK+6+8BrrYjpJ79fneSuJF9N8otJ3pnZUJsfTXJnkmd393VVtT3Ja5N8X2Y9kT7U3b9QVWckuTSz3hzvTfKc7t5ZVc9Lsqu7X1hVT0jyliQ/nuR7klyQ5L5TGa/o7suq6oZp/S9Ntb0oyQumdd6d5OU9PblV9euZDQH6WpIXd/f7NuQJ2mQDH48bkjwgsyFbt2Y2pOvTG/AUAQAAwIYYKtC5N3f/IM9yOR4AAACw8YYacgUAAADAFuihAwAAAHCk0UMHAAAAYDACHQAAAIDBCHQAAAAABrNtnpW3b9/eO3fu3KBSAAAAAI48V1111Ze6+7h5tpkr0Nm5c2fW1tbmqwoAAACAfaqqG+fdxpArAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwcwV6Hz9llsWeue3XXjRQveXJJfsuW7h+1yEVa1rFW3F52orPibG53XJKtnf63EjrhfgUB3q69F7LnCk8z64eHMFOnctONC5/aKLF7q/JHnDB69f+D4XYVXrWkVb8bnaio+J8Xldskr293rciOsFOFSH+nr0ngsc6bwPLp4hVwAAAACDEegAAAAADGbbvBvs3XHCRtSxUKefd/myS+AwOYawObQ1RjHC9QcciPdcABZJDx0AAACAwQh0AAAAAAYz95CrHXtvWtidb1T36SvPP2dD9ns4dLGdzyoew8Ph+LOqtlpbY1wHep9c5PUHHI7DuX71ngscyXwmWjw9dAAAAAAGI9ABAAAAGIxABwAAAGAwcwU6Rx1//ELv/NiXvmSh+0uS55954sL3uQirWtcq2orP1VZ8TIzP65JVsr/X40ZcL8ChOtTXo/dc4EjnfXDxqrsPeuVdu3b12traBpYDAAAAcGSpqqu6e9c82xhyBQAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEehsgtsuvGjZJQxvs55DxwruSbtg1Vyy57oh9gkAq8Z13dYi0NkEt1908bJLGN5mPYeOFdyTdsGqecMHrx9inwCwalzXbS0CHQAAAIDBCHQAAAAABiPQAQAAABjMtmUXcKTYu+OEZZfAQXKsAFbf6eddvuwSAGBIPu9sHXroAAAAAAxGoAMAAAAwGEOuNsmOvTctu4ShbWa3QMcKvpVuuayiK88/Z6H7M4QLgCOFzzsrqmruTfTQAQAAABiMQAcAAABgMAKdTXDsS1+y7BKGt1nPoWMF96RdsGqef+aJQ+wTAFaN67qtpbr7oFfetWtXr62tbWA5AAAAAEeWqrqqu3fNs40eOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIOp7j74lav+PcmNG1cO3KvtSb607CKAQ6L9wpi0XRiTtgvjekx3HzvPBtvmWbm7j5uvHjh8VbXW3buWXQcwP+0XxqTtwpi0XRhXVa3Nu40hVwAAAACDEegAAAAADEagwwj+ZNkFAIdM+4UxabswJm0XxjV3+53rS5EBAAAAWD49dAAAAAAGI9BhpVTVG6vqi1X1qXXzHlxVV1TVv0x/v2OZNQL3tI+2u7uq9lbV1dPtKcusEbinqjqhqvZU1aer6tqqetE037kXVtx+2q/zL6ywqvq2qvpYVX1yarvnT/MfXlUfrarrqurtVXWfA+1LoMOqeVOSc+8272VJPtDdj0rygel/YLW8Kfdsu0lycXefMt3+epNrAg7sa0l+ubtPSnJ6khdU1Ulx7oUR7Kv9Js6/sMruTHJWd5+c5JQk51bV6UkuyKztPjLJfyV5/oF2JNBhpXT3h5L8591mPy3JpdP0pUl+clOLAg5oH20XWHHdfXN3f3yavj3JZ5LsiHMvrLz9tF9ghfXMHdO/x0y3TnJWkndO8w/q3CvQYQTHd/fN0/QXkhy/zGKAubywqv5pGpJlyAassKrameQJST4a514Yyt3ab+L8Cyutqo6uqquTfDHJFUmuT3Jrd39tWuXzOYiAVqDDUHr2s2x+mg3G8EdJTsysK+nNSS5cbjnAvlTV/ZO8K8mLu/u29cuce2G13Uv7df6FFdfdX+/uU5I8LMlpSb73UPYj0GEEt1TVQ5Nk+vvFJdcDHITuvmU6Wd2V5JLMTlbAiqmqYzL7MPiW7n73NNu5FwZwb+3X+RfG0d23JtmT5IwkD6qqbdOihyXZe6DtBTqM4LIkz52mn5vkL5dYC3CQvvFhcPL0JJ/a17rAclRVJXlDks9090XrFjn3worbV/t1/oXVVlXHVdWDpulvT/IjmX0H1p4kz5hWO6hzb8160cJqqKq3JjkzyfYktyQ5L8lfJHlHku9OcmOSn+1uX74KK2QfbffMzLp7d5Ibkvz8uu/kAFZAVf1Qkg8nuSbJXdPsX8vsezice2GF7af9PjvOv7Cyqur7M/vS46Mz62Tzju7+zap6RJK3JXlwkk8keU5337nffQl0AAAAAMZiyBUAAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAwMqrqodU1dXT7QtVtXeavqOqXrfs+gAANpufLQcAhlJVu5Pc0d2/u+xaAACWRQ8dAGBYVXVmVb1nmt5dVZdW1Yer6saq+qmqelVVXVNV76+qY6b1nlhVf1tVV1XV5VX10OU+CgCA+Ql0AICt5MQkZyX5iSRvTrKnux+f5H+TPHUKdf4wyTO6+4lJ3pjkt5dVLADAodq27AIAABbofd391aq6JsnRSd4/zb8myc4kj0nyuCRXVFWmdW5eQp0AAIdFoAMAbCV3Jkl331VVX+1vflngXZld91SSa7v7jGUVCACwCIZcAQBHks8lOa6qzkiSqjqmqh675JoAAOYm0AEAjhjd/ZUkz0hyQVV9MsnVSX5wuVUBAMzPz5YDAAAADEYPHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAw/w9yi/xWuRzNKQAAAABJRU5ErkJggg==", + "text/plain": [ + "" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "groundtruth_rttm, _ = google.colab.files.upload().popitem()\n", + "groundtruths = load_rttm(groundtruth_rttm)\n", + "if OWN_FILE['audio'] in groundtruths:\n", + " groundtruth = groundtruths[OWN_FILE['audio']]\n", + "else:\n", + " _, groundtruth = groundtruths.popitem()\n", + "groundtruth" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "5MclWK2GYnp_" + }, + "source": [ + "# Speaker diarization with `pyannote.pipeline`\n", + "\n", + "We are about to run a full speaker diarization pipeline, that includes speaker segmentation, speaker embedding, and a final clustering step. **Brace yourself!**\n", + "\n", + "To load the speaker diarization pipeline, \n", + "\n", + "* accept the user conditions on [hf.co/pyannote/speaker-diarization](https://hf.co/pyannote/speaker-diarization)\n", + "* accept the user conditions on [hf.co/pyannote/segmentation](https://hf.co/pyannote/segmentation)\n", + "* login using `notebook_login` below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 301, + "referenced_widgets": [ + "c8731777ce834e58a76a295076200cfc", + "859b12a6d95b4c6f987791ca848122b9", + "94756148d2e94a93ae233baba20af683", + "ba18cded436e486da34882d821d8f1eb", + "99898e6ee64a46bd832af112e79b58b7", + "79184c8c2a6f4b7493bb7f6983f18a09", + "ea95ffd922c0455d957120f034e541f8", + "13525aa369a9410a83343952ab511f3c", + "b2be65e192384c948fb8987d4cfca505", + "333b42ca7aa44788b1c22724eb11bcc3", + "0e382d66f09f4958a40baa7ab83c4ccb", + "6a45ce374e2e47ba9457d02e02522748", + "765485a1d3f941d28b79782dcffbf401", + "3499ef4dd9f243d9bef00b396e78ed69" + ] }, - "df8dc69ca1cf4e57bfebf4762c0460f0": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + "id": "r5u7VMb-YnqB", + "outputId": "c714a997-d4f8-417a-e5ad-0a4924333859", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Login successful\n", + "Your token has been saved to /root/.huggingface/token\n" + ] + } + ], + "source": [ + "from huggingface_hub import notebook_login\n", + "notebook_login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 273, + "referenced_widgets": [ + "183c55d5d3ce4058ae338c81344547c5", + "70efa83bf3ea45b4bd8cc41f57613328", + "338747810ac74b4e83e356a01459c8a5", + "ac0bcfa1ef6e4e78a7769c4cb2e8762f", + "6efb7939bb954dc8ba116680139eb257", + "6242493d251a47609c0c44f1dbe82958", + "f439c1de68ac4c799d81fdb29d053d10", + "e4c1e9affaba4045a3ec903091b6f454", + "1946386483ed4947a2184cdb4ea6e434", + "549a30c85c47466eadedbd24da42e304", + "bedc7d916b9745f097094c5c51a81f06", + "d12f07e25bf5422facc38c3463700994", + "eae11f84c2644ada8295b445c924baec", + "bcf766d2a2c641f0aa2af596c7da1b18", + "74bf69aa6eaa4a8594b2ea9a0fb20957", + "2d7a0b901d7044d5b1f273a3e9bea560", + "2cbf0faadd4842c8b22e10541ff9de4e", + "ab32c7daa1d9404fb921f39fbc4fc05c", + "ee537ee5470f4d7b816a8c8f96948b4d", + "652e97509a914f3b914665c4889c6d11", + "ebc9801e164a44b3b6f8dc7f590e1c79", + "0821b47ae70444dfa38b84719c4836a6", + "c3358d32ac814ea6bc5714402c5bc62d", + "ecd8e5e364d34ea8bfbba4fbd467384d", + "0125df9fa8e14b3db0e2bce299529812", + "e3169ca885e04536a709d5751173ce9a", + "70abdfd99be84f7b9b8d24fee9eec022", + "554e567a83b348f88092c6ba01830930", + "6e334cad2e94462cae6e722bd6f11a9e", + "407e250e244b4985b1ce8c9d32a8af7d", + "8127c4258e374ad986ce1f8b4c70f704", + "358c3a67f8b54c4c899e095611fa116b", + "e1c9df12fa034c93a9b3530ea4a7c5aa", + "404f7ce06a01470fbb0b747981d00e84", + "38b3054ad59549e4b4f2de4697139a87", + "7d90af87c9574f5ca21fca058c39bf02", + "fee75343289f42fb8d6dfb4bf26fe368", + "f21c0c6379d74898ac6aadcb6fc14a8a", + "0adb304bf90f4079a4031caea1cfb924", + "40021e0b59fe4e1e9bac351dbec57c6c", + "ed169fd606274f2ebbb3e8f32ab42431", + "304e9682570b4abeb1719001c04449d6", + "16c0017f65b649f5ac5bebf1c955a1fd", + "5e2c207db5424f91829bf5c52040a9f2", + "8011d68253ac4080a637659ef3383dc4", + "e928540e99564d808cb2d12c92daa498", + "fc9a3c4ae0a947ec91a227360a80f602", + "f91dcd9f30c743d69f9d4b7e8d1beba5", + "6ede83f870a24e71b5182fcc458cdc42", + "c9974003727a401797953ef2885db5a2", + "77a361d1ff214e8799891bbeb28a0789", + "27f6f437c5264368bc2c679942ad1e53", + "e7728d9c55e44274966f8f6dbc445c54", + "2b2d7912186a49dd9891ae12c77482c7", + "1600b9cd09c446e581b7912e35c9f56e", + "28004251b0e44a6c9dfa7ce1b30dcb18", + "e98cf7a63c814ffd94f69928f0700ebf", + "6a4dee55cbae4959bd7fe3c4d92242b1", + "8dba487876124827919079519406ecb8", + "5c211704f90946afbae2f66a7586ce70", + "aba21021d3bb4565a58ffa40049810db", + "f7812fa7fbf744c1b261b985d085e28e", + "d7071582bfbe4ec4b2c3c9843e5481ae", + "0d80273cabbc42ba9a408fb1144151c9", + "67fcc38a1e5d4eb39381685447e397de", + "0b4bf8076fdf4d19843a3246c8bd61ac", + "d182e37b4a404158bee8446fc2728bd9", + "603e99f45afb4910a99f7684ffd21b6a", + "d13ba6030aff42bca48c72ff071c44c0", + "a899f4bc6ed842d397723cca582669e6", + "a02030ba8f324d93a7ed6cc793d70a3b", + "b26354d0278f447d92c7e1ad4c211d64", + "3bd33a372aad4c438f64d73c97f14c6a", + "c8e0c9a60ef34d2caee9d55a3c21c3d4", + "764aa53d75324d73ab06936c52fd8fc8", + "341615c971b04033b7293d82fc40f35c", + "17856a72e4e948039a66c51e8244cb50", + "41eb32a6fef141ff9cc3ce6e4d771822", + "0d10fb0edc9144b1a1fc1f2c9e322410", + "32accb0adfa24c62a75c15c8ec88df8c", + "bf299285318b4a04a88569cc581ecd75", + "ac2950d08fc145ba9eb9cf5824b1ee18", + "d33fba0d78fb41f983c55f5cd2a0a740", + "fd47487fc8734594823f8afa00c4239d", + "23d4e25ec6c541818d5927b69576d278", + "54d9456703324160aced03ee5fef2943", + "bacfb50c001047c4824a05c9f2ee2e40", + "c53a1cf68fcd4388abf1f0379891089a" + ] }, - "799487c4c6de471c827f120f1c11d9e2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + "id": "lUq1UvoJYnqB", + "outputId": "8c052808-d0b2-4f2e-8771-f86114ae3fe3", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "183c55d5d3ce4058ae338c81344547c5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading: 0%| | 0.00/500 [00:00" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diarization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DLhErS6wYnqB" + }, + "source": [ + "# Evaluation with `pyannote.metrics`\n", + "\n", + "Because groundtruth is available, we can evaluate the quality of the diarization pipeline by computing the [diarization error rate](http://pyannote.github.io/pyannote-metrics/reference.html#diarization)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vNHQRTUIYnqB", + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "from pyannote.metrics.diarization import DiarizationErrorRate\n", + "metric = DiarizationErrorRate()\n", + "der = metric(groundtruth, diarization)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9d0vKQ0fYnqB", + "outputId": "9a664753-cd84-4211-9153-d33e929bb252", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "diarization error rate = 19.2%\n" + ] + } + ], + "source": [ + "print(f'diarization error rate = {100 * der:.1f}%')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xz5QJV9nYnqB" + }, + "source": [ + "This implementation of diarization error rate is brought to you by [`pyannote.metrics`](http://pyannote.github.io/pyannote-metrics/).\n", + "\n", + "It can also be used to improve visualization by find the optimal one-to-one mapping between groundtruth and hypothesized speakers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 230 + }, + "id": "xMLf4mrYYnqB", + "outputId": "ed08bcc8-24c6-439c-a244-3a673ff480b0", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping = metric.optimal_mapping(groundtruth, diarization)\n", + "diarization.rename_labels(mapping=mapping)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 230 }, - "4ea0432a604e4ac197999debb131c557": { + "id": "Z0ewsLlQYnqB", + "outputId": "8a8cd040-ee1d-48f7-d4be-eef9e08e9e55", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "groundtruth" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MxlrTbyPYnqB" + }, + "source": [ + "# Going further \n", + "\n", + "We have only scratched the surface in this introduction. \n", + "\n", + "More details can be found in the [`pyannote.audio` Github repository](https://github.com/pyannote/pyannote-audio).\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "0125df9fa8e14b3db0e2bce299529812": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_5749dba1be9b4eaaad17928a90ea411c", - "placeholder": "​", - "style": "IPY_MODEL_f0f3451d91bf4c5a9a8e20623a7b528d", - "value": " 17.7M/17.7M [00:00<00:00, 50.2MB/s]" + "layout": "IPY_MODEL_407e250e244b4985b1ce8c9d32a8af7d", + "max": 318, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8127c4258e374ad986ce1f8b4c70f704", + "value": 318 + } + }, + "0821b47ae70444dfa38b84719c4836a6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "21cd4080794342dbb3209df737d4f835": { + "0adb304bf90f4079a4031caea1cfb924": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -502,77 +982,62 @@ "width": null } }, - "5cdd23be0a804fd192b6ccaddcb52964": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "0b4bf8076fdf4d19843a3246c8bd61ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_name": "StyleView", + "description_width": "" } }, - "889aca71e695487d9cd31c2dae585a64": { + "0d10fb0edc9144b1a1fc1f2c9e322410": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d33fba0d78fb41f983c55f5cd2a0a740", + "placeholder": "​", + "style": "IPY_MODEL_fd47487fc8734594823f8afa00c4239d", + "value": "Downloading: 100%" + } + }, + "0d80273cabbc42ba9a408fb1144151c9": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "b9fc636fcc6f4a2785999385cd340ab9": { + "0e382d66f09f4958a40baa7ab83c4ccb": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -621,26 +1086,40 @@ "width": null } }, - "ebf9284b6e514698b8c7389c758f7520": { + "13525aa369a9410a83343952ab511f3c": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1600b9cd09c446e581b7912e35c9f56e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "5749dba1be9b4eaaad17928a90ea411c": { + "16c0017f65b649f5ac5bebf1c955a1fd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -689,10 +1168,10 @@ "width": null } }, - "f0f3451d91bf4c5a9a8e20623a7b528d": { + "17856a72e4e948039a66c51e8244cb50": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -704,10 +1183,10 @@ "description_width": "" } }, - "0f8ee54b3031408da0281f7aa98eff25": { + "183c55d5d3ce4058ae338c81344547c5": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -719,83 +1198,33 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_ff89e65846414100b84b1f9cf8b70fd9", - "IPY_MODEL_a950bac54c5742e79cd2de4fda29c2e0", - "IPY_MODEL_3f5459fa25654a76bfec64d1dcb542b6" + "IPY_MODEL_70efa83bf3ea45b4bd8cc41f57613328", + "IPY_MODEL_338747810ac74b4e83e356a01459c8a5", + "IPY_MODEL_ac0bcfa1ef6e4e78a7769c4cb2e8762f" ], - "layout": "IPY_MODEL_fa10eb9e43fd43a49934cdb4301aefff" - } - }, - "ff89e65846414100b84b1f9cf8b70fd9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_547cbb166dba40d1b626bd204ff3cf96", - "placeholder": "​", - "style": "IPY_MODEL_a2aba4e8152c4fbda91bf59a42f25604", - "value": "Downloading: 100%" - } - }, - "a950bac54c5742e79cd2de4fda29c2e0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_69a8212dded54f79adf3f8b60b9eb000", - "max": 318, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_25142cfd61084c128fe39adad5016109", - "value": 318 + "layout": "IPY_MODEL_6efb7939bb954dc8ba116680139eb257" } }, - "3f5459fa25654a76bfec64d1dcb542b6": { + "1946386483ed4947a2184cdb4ea6e434": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7e64327fa6e948edae034c8d52bb96b4", - "placeholder": "​", - "style": "IPY_MODEL_32895abb57a7426aa089230aa49cfff2", - "value": " 318/318 [00:00<00:00, 9.69kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "fa10eb9e43fd43a49934cdb4301aefff": { + "23d4e25ec6c541818d5927b69576d278": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -844,10 +1273,10 @@ "width": null } }, - "547cbb166dba40d1b626bd204ff3cf96": { + "27f6f437c5264368bc2c679942ad1e53": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -896,25 +1325,32 @@ "width": null } }, - "a2aba4e8152c4fbda91bf59a42f25604": { + "28004251b0e44a6c9dfa7ce1b30dcb18": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e98cf7a63c814ffd94f69928f0700ebf", + "IPY_MODEL_6a4dee55cbae4959bd7fe3c4d92242b1", + "IPY_MODEL_8dba487876124827919079519406ecb8" + ], + "layout": "IPY_MODEL_5c211704f90946afbae2f66a7586ce70" } }, - "69a8212dded54f79adf3f8b60b9eb000": { + "2b2d7912186a49dd9891ae12c77482c7": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -963,26 +1399,62 @@ "width": null } }, - "25142cfd61084c128fe39adad5016109": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", + "2cbf0faadd4842c8b22e10541ff9de4e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "7e64327fa6e948edae034c8d52bb96b4": { + "2d7a0b901d7044d5b1f273a3e9bea560": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1031,68 +1503,65 @@ "width": null } }, - "32895abb57a7426aa089230aa49cfff2": { + "304e9682570b4abeb1719001c04449d6": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "747aa6596d1a4b04a6f77e38179776ad": { + "32accb0adfa24c62a75c15c8ec88df8c": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_e65942656e2347eba0a3fe3ae872159b", - "IPY_MODEL_6e5c84bfa614482b960be701f92ee22c", - "IPY_MODEL_def1205f95274d3db0fafc9c8dd1c913" - ], - "layout": "IPY_MODEL_46a04e196f9e461bb43e541270022b8d" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23d4e25ec6c541818d5927b69576d278", + "max": 128619, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_54d9456703324160aced03ee5fef2943", + "value": 128619 } }, - "e65942656e2347eba0a3fe3ae872159b": { + "333b42ca7aa44788b1c22724eb11bcc3": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_18a02eecf49746cc972882c2cca9ded2", - "placeholder": "​", - "style": "IPY_MODEL_31e65957ea204c1084119e3e39528832", - "value": "Downloading: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "6e5c84bfa614482b960be701f92ee22c": { + "338747810ac74b4e83e356a01459c8a5": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1105,39 +1574,18 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_aa6ccf1390cc4139b7ba23ea7129b273", - "max": 1920, + "layout": "IPY_MODEL_e4c1e9affaba4045a3ec903091b6f454", + "max": 500, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_264cde3fae9d442abdb74b4d45288be3", - "value": 1920 + "style": "IPY_MODEL_1946386483ed4947a2184cdb4ea6e434", + "value": 500 } }, - "def1205f95274d3db0fafc9c8dd1c913": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2c28a61cfa474c7498a8381e74639530", - "placeholder": "​", - "style": "IPY_MODEL_5895a4011e3143a1931566822a65fd50", - "value": " 1.92k/1.92k [00:00<00:00, 61.7kB/s]" - } - }, - "46a04e196f9e461bb43e541270022b8d": { + "341615c971b04033b7293d82fc40f35c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1186,10 +1634,25 @@ "width": null } }, - "18a02eecf49746cc972882c2cca9ded2": { + "3499ef4dd9f243d9bef00b396e78ed69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "358c3a67f8b54c4c899e095611fa116b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1238,10 +1701,46 @@ "width": null } }, - "31e65957ea204c1084119e3e39528832": { + "38b3054ad59549e4b4f2de4697139a87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0adb304bf90f4079a4031caea1cfb924", + "placeholder": "​", + "style": "IPY_MODEL_40021e0b59fe4e1e9bac351dbec57c6c", + "value": "Downloading: 100%" + } + }, + "3bd33a372aad4c438f64d73c97f14c6a": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "40021e0b59fe4e1e9bac351dbec57c6c": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1253,10 +1752,32 @@ "description_width": "" } }, - "aa6ccf1390cc4139b7ba23ea7129b273": { + "404f7ce06a01470fbb0b747981d00e84": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_38b3054ad59549e4b4f2de4697139a87", + "IPY_MODEL_7d90af87c9574f5ca21fca058c39bf02", + "IPY_MODEL_fee75343289f42fb8d6dfb4bf26fe368" + ], + "layout": "IPY_MODEL_f21c0c6379d74898ac6aadcb6fc14a8a" + } + }, + "407e250e244b4985b1ce8c9d32a8af7d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1305,26 +1826,32 @@ "width": null } }, - "264cde3fae9d442abdb74b4d45288be3": { + "41eb32a6fef141ff9cc3ce6e4d771822": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0d10fb0edc9144b1a1fc1f2c9e322410", + "IPY_MODEL_32accb0adfa24c62a75c15c8ec88df8c", + "IPY_MODEL_bf299285318b4a04a88569cc581ecd75" + ], + "layout": "IPY_MODEL_ac2950d08fc145ba9eb9cf5824b1ee18" } }, - "2c28a61cfa474c7498a8381e74639530": { + "549a30c85c47466eadedbd24da42e304": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1373,113 +1900,26 @@ "width": null } }, - "5895a4011e3143a1931566822a65fd50": { + "54d9456703324160aced03ee5fef2943": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "43ada2ceb8ad44d7b8c1a3a1d0aefe2d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_42b71601aeef4271ac150288a78bf66a", - "IPY_MODEL_300b503e606147bb8bc9ff7a98be0bac", - "IPY_MODEL_dcce36fe0b0946aaa484ae1f5f7eb6da" - ], - "layout": "IPY_MODEL_5beb3def5ae64f05b3ceb26c611dcb79" - } - }, - "42b71601aeef4271ac150288a78bf66a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_030181ab29c149dfaf4940ea2c713190", - "placeholder": "​", - "style": "IPY_MODEL_cdec3493e32e49a1abc5f3c6783a49d9", - "value": "Downloading: 100%" - } - }, - "300b503e606147bb8bc9ff7a98be0bac": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b0efbf56235f4f9f866137c85ea2e189", - "max": 83316686, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_12bee4747bc74073bf4cbfd36f0445cb", - "value": 83316686 - } - }, - "dcce36fe0b0946aaa484ae1f5f7eb6da": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cb0fc23346fa4b638c3cce496ac6cd70", - "placeholder": "​", - "style": "IPY_MODEL_0200715daa5e4f4e8f7839d2ea579f19", - "value": " 83.3M/83.3M [00:03<00:00, 35.1MB/s]" - } - }, - "5beb3def5ae64f05b3ceb26c611dcb79": { + "554e567a83b348f88092c6ba01830930": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1528,10 +1968,10 @@ "width": null } }, - "030181ab29c149dfaf4940ea2c713190": { + "5c211704f90946afbae2f66a7586ce70": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1580,10 +2020,10 @@ "width": null } }, - "cdec3493e32e49a1abc5f3c6783a49d9": { + "5e2c207db5424f91829bf5c52040a9f2": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1595,10 +2035,31 @@ "description_width": "" } }, - "b0efbf56235f4f9f866137c85ea2e189": { + "603e99f45afb4910a99f7684ffd21b6a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b26354d0278f447d92c7e1ad4c211d64", + "placeholder": "​", + "style": "IPY_MODEL_3bd33a372aad4c438f64d73c97f14c6a", + "value": "Downloading: 100%" + } + }, + "6242493d251a47609c0c44f1dbe82958": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1647,10 +2108,10 @@ "width": null } }, - "12bee4747bc74073bf4cbfd36f0445cb": { + "652e97509a914f3b914665c4889c6d11": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1663,10 +2124,10 @@ "description_width": "" } }, - "cb0fc23346fa4b638c3cce496ac6cd70": { + "67fcc38a1e5d4eb39381685447e397de": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1715,68 +2176,26 @@ "width": null } }, - "0200715daa5e4f4e8f7839d2ea579f19": { + "6a45ce374e2e47ba9457d02e02522748": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ButtonStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "description_width": "" - } - }, - "892182c8c97d426d9acf5467d1354d32": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_32acc8ab3c5643d7a4c3595fd67da269", - "IPY_MODEL_3c3189d90a404112ac7f6081acab7e61", - "IPY_MODEL_34e0fb65b69d470099506bd24362c52c" - ], - "layout": "IPY_MODEL_c697909d569d40918acd835108e29ae7" + "button_color": null, + "font_weight": "" } }, - "32acc8ab3c5643d7a4c3595fd67da269": { + "6a4dee55cbae4959bd7fe3c4d92242b1": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d690ed1451e74799a2c4c265cd562ed9", - "placeholder": "​", - "style": "IPY_MODEL_f89679f445424ec1ba2203fb8f7753de", - "value": "Downloading: 100%" - } - }, - "3c3189d90a404112ac7f6081acab7e61": { - "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1789,39 +2208,33 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_735b5f02606d4c0288f70469a8ab2902", + "layout": "IPY_MODEL_d7071582bfbe4ec4b2c3c9843e5481ae", "max": 1921, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_0308a7a44b8b4a9cae7bda21fbad99f5", + "style": "IPY_MODEL_0d80273cabbc42ba9a408fb1144151c9", "value": 1921 } }, - "34e0fb65b69d470099506bd24362c52c": { + "6e334cad2e94462cae6e722bd6f11a9e": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_967a5451e89049b4ac4688338495b50e", - "placeholder": "​", - "style": "IPY_MODEL_bb655ffda271431892bba5ebab9fd93b", - "value": " 1.92k/1.92k [00:00<00:00, 50.8kB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "c697909d569d40918acd835108e29ae7": { + "6ede83f870a24e71b5182fcc458cdc42": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1870,10 +2283,62 @@ "width": null } }, - "d690ed1451e74799a2c4c265cd562ed9": { + "6efb7939bb954dc8ba116680139eb257": { "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "70abdfd99be84f7b9b8d24fee9eec022": { + "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1922,25 +2387,68 @@ "width": null } }, - "f89679f445424ec1ba2203fb8f7753de": { + "70efa83bf3ea45b4bd8cc41f57613328": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6242493d251a47609c0c44f1dbe82958", + "placeholder": "​", + "style": "IPY_MODEL_f439c1de68ac4c799d81fdb29d053d10", + "value": "Downloading: 100%" + } + }, + "74bf69aa6eaa4a8594b2ea9a0fb20957": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ebc9801e164a44b3b6f8dc7f590e1c79", + "placeholder": "​", + "style": "IPY_MODEL_0821b47ae70444dfa38b84719c4836a6", + "value": " 17.7M/17.7M [00:00<00:00, 54.3MB/s]" + } + }, + "764aa53d75324d73ab06936c52fd8fc8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "735b5f02606d4c0288f70469a8ab2902": { + "765485a1d3f941d28b79782dcffbf401": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1989,26 +2497,25 @@ "width": null } }, - "0308a7a44b8b4a9cae7bda21fbad99f5": { + "77a361d1ff214e8799891bbeb28a0789": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "967a5451e89049b4ac4688338495b50e": { + "79184c8c2a6f4b7493bb7f6983f18a09": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2018,13 +2525,13 @@ "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, - "align_items": null, + "align_items": "center", "align_self": null, "border": null, "bottom": null, - "display": null, + "display": "flex", "flex": null, - "flex_flow": null, + "flex_flow": "column", "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, @@ -2054,28 +2561,37 @@ "right": null, "top": null, "visibility": null, - "width": null + "width": "50%" } }, - "bb655ffda271431892bba5ebab9fd93b": { + "7d90af87c9574f5ca21fca058c39bf02": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ed169fd606274f2ebbb3e8f32ab42431", + "max": 1920, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_304e9682570b4abeb1719001c04449d6", + "value": 1920 } }, - "f7626a7dae2a421ba31a578746be491d": { + "8011d68253ac4080a637659ef3383dc4": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2087,17 +2603,33 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_97d03beea7cb4e479e1ee67edfa5f883", - "IPY_MODEL_00751b0ca24b47c1b7d337a5b17fc6c1", - "IPY_MODEL_9fc668b6710c449e898d4b8401fe974c" + "IPY_MODEL_e928540e99564d808cb2d12c92daa498", + "IPY_MODEL_fc9a3c4ae0a947ec91a227360a80f602", + "IPY_MODEL_f91dcd9f30c743d69f9d4b7e8d1beba5" ], - "layout": "IPY_MODEL_d0ea1fb45fe24bc3b549acc4e05860cd" + "layout": "IPY_MODEL_6ede83f870a24e71b5182fcc458cdc42" } }, - "97d03beea7cb4e479e1ee67edfa5f883": { + "8127c4258e374ad986ce1f8b4c70f704": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "859b12a6d95b4c6f987791ca848122b9": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2109,40 +2641,60 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c15d7b4b314b4fb1beb5e8b001114392", + "layout": "IPY_MODEL_ea95ffd922c0455d957120f034e541f8", "placeholder": "​", - "style": "IPY_MODEL_6f73694403ae46538b7280ab31657ebc", - "value": "Downloading: 100%" + "style": "IPY_MODEL_13525aa369a9410a83343952ab511f3c", + "value": "

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
" } }, - "00751b0ca24b47c1b7d337a5b17fc6c1": { + "8dba487876124827919079519406ecb8": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_f33d2ce982144ced87a7a834fe83bdb8", - "max": 5534328, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_7693042eb27e4994b86ed1afb293be56", - "value": 5534328 + "layout": "IPY_MODEL_67fcc38a1e5d4eb39381685447e397de", + "placeholder": "​", + "style": "IPY_MODEL_0b4bf8076fdf4d19843a3246c8bd61ac", + "value": " 1.92k/1.92k [00:00<00:00, 63.2kB/s]" } }, - "9fc668b6710c449e898d4b8401fe974c": { + "94756148d2e94a93ae233baba20af683": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "PasswordModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "PasswordModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "PasswordView", + "continuous_update": true, + "description": "Token:", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_b2be65e192384c948fb8987d4cfca505", + "placeholder": "​", + "style": "IPY_MODEL_333b42ca7aa44788b1c22724eb11bcc3", + "value": "" + } + }, + "99898e6ee64a46bd832af112e79b58b7": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2154,16 +2706,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_fd6ccdba0a2746419e744035e1096bf2", + "layout": "IPY_MODEL_765485a1d3f941d28b79782dcffbf401", "placeholder": "​", - "style": "IPY_MODEL_2dc0b625a4ff4a60abc4fa3c6671c9ab", - "value": " 5.53M/5.53M [00:00<00:00, 28.0MB/s]" + "style": "IPY_MODEL_3499ef4dd9f243d9bef00b396e78ed69", + "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, - "d0ea1fb45fe24bc3b549acc4e05860cd": { + "a02030ba8f324d93a7ed6cc793d70a3b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2212,10 +2764,46 @@ "width": null } }, - "c15d7b4b314b4fb1beb5e8b001114392": { + "a899f4bc6ed842d397723cca582669e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_341615c971b04033b7293d82fc40f35c", + "placeholder": "​", + "style": "IPY_MODEL_17856a72e4e948039a66c51e8244cb50", + "value": " 5.53M/5.53M [00:00<00:00, 21.7MB/s]" + } + }, + "ab32c7daa1d9404fb921f39fbc4fc05c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aba21021d3bb4565a58ffa40049810db": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2264,25 +2852,31 @@ "width": null } }, - "6f73694403ae46538b7280ab31657ebc": { + "ac0bcfa1ef6e4e78a7769c4cb2e8762f": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_549a30c85c47466eadedbd24da42e304", + "placeholder": "​", + "style": "IPY_MODEL_bedc7d916b9745f097094c5c51a81f06", + "value": " 500/500 [00:00<00:00, 5.05kB/s]" } }, - "f33d2ce982144ced87a7a834fe83bdb8": { + "ac2950d08fc145ba9eb9cf5824b1ee18": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2331,26 +2925,10 @@ "width": null } }, - "7693042eb27e4994b86ed1afb293be56": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "fd6ccdba0a2746419e744035e1096bf2": { + "b26354d0278f447d92c7e1ad4c211d64": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2399,68 +2977,136 @@ "width": null } }, - "2dc0b625a4ff4a60abc4fa3c6671c9ab": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", + "b2be65e192384c948fb8987d4cfca505": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "0552df02d084406da5b7c51e18fd2cb0": { + "ba18cded436e486da34882d821d8f1eb": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ButtonModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ButtonModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f61c385e5fa542a98652a434ad4cd324", - "IPY_MODEL_923af70854ad4651bd2d2a8556ab59f6", - "IPY_MODEL_170805a3d7ec457aaacd4d9a0acd09d0" - ], - "layout": "IPY_MODEL_14713e02e5734e46bfb9f9f815699e39" + "_view_name": "ButtonView", + "button_style": "", + "description": "Login", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_0e382d66f09f4958a40baa7ab83c4ccb", + "style": "IPY_MODEL_6a45ce374e2e47ba9457d02e02522748", + "tooltip": "" } }, - "f61c385e5fa542a98652a434ad4cd324": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", + "bacfb50c001047c4824a05c9f2ee2e40": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bea9fb28848d4c3da10df4fec3dfdb6b", - "placeholder": "​", - "style": "IPY_MODEL_a296598d7d3040e8b8b4dab4f7819e91", - "value": "Downloading: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "923af70854ad4651bd2d2a8556ab59f6": { + "bcf766d2a2c641f0aa2af596c7da1b18": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2473,18 +3119,33 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_ab878626da504834b827f1762bcd2c20", - "max": 128619, + "layout": "IPY_MODEL_ee537ee5470f4d7b816a8c8f96948b4d", + "max": 17719103, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_f5a6de62a96340f4ab3094f08e8d54ef", - "value": 128619 + "style": "IPY_MODEL_652e97509a914f3b914665c4889c6d11", + "value": 17719103 } }, - "170805a3d7ec457aaacd4d9a0acd09d0": { + "bedc7d916b9745f097094c5c51a81f06": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bf299285318b4a04a88569cc581ecd75": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2496,16 +3157,76 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_af1e4e3e52304c51a46eab4b989a3f0b", + "layout": "IPY_MODEL_bacfb50c001047c4824a05c9f2ee2e40", "placeholder": "​", - "style": "IPY_MODEL_b0b8bb8fe0bc47908273136304cf4870", - "value": " 129k/129k [00:00<00:00, 210kB/s]" + "style": "IPY_MODEL_c53a1cf68fcd4388abf1f0379891089a", + "value": " 129k/129k [00:00<00:00, 155kB/s]" + } + }, + "c3358d32ac814ea6bc5714402c5bc62d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ecd8e5e364d34ea8bfbba4fbd467384d", + "IPY_MODEL_0125df9fa8e14b3db0e2bce299529812", + "IPY_MODEL_e3169ca885e04536a709d5751173ce9a" + ], + "layout": "IPY_MODEL_70abdfd99be84f7b9b8d24fee9eec022" + } + }, + "c53a1cf68fcd4388abf1f0379891089a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c8731777ce834e58a76a295076200cfc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_859b12a6d95b4c6f987791ca848122b9", + "IPY_MODEL_94756148d2e94a93ae233baba20af683", + "IPY_MODEL_ba18cded436e486da34882d821d8f1eb", + "IPY_MODEL_99898e6ee64a46bd832af112e79b58b7" + ], + "layout": "IPY_MODEL_79184c8c2a6f4b7493bb7f6983f18a09" } }, - "14713e02e5734e46bfb9f9f815699e39": { + "c8e0c9a60ef34d2caee9d55a3c21c3d4": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2554,10 +3275,10 @@ "width": null } }, - "bea9fb28848d4c3da10df4fec3dfdb6b": { + "c9974003727a401797953ef2885db5a2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2606,25 +3327,78 @@ "width": null } }, - "a296598d7d3040e8b8b4dab4f7819e91": { + "d12f07e25bf5422facc38c3463700994": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HBoxModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_eae11f84c2644ada8295b445c924baec", + "IPY_MODEL_bcf766d2a2c641f0aa2af596c7da1b18", + "IPY_MODEL_74bf69aa6eaa4a8594b2ea9a0fb20957" + ], + "layout": "IPY_MODEL_2d7a0b901d7044d5b1f273a3e9bea560" + } + }, + "d13ba6030aff42bca48c72ff071c44c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c8e0c9a60ef34d2caee9d55a3c21c3d4", + "max": 5534328, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_764aa53d75324d73ab06936c52fd8fc8", + "value": 5534328 + } + }, + "d182e37b4a404158bee8446fc2728bd9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_603e99f45afb4910a99f7684ffd21b6a", + "IPY_MODEL_d13ba6030aff42bca48c72ff071c44c0", + "IPY_MODEL_a899f4bc6ed842d397723cca582669e6" + ], + "layout": "IPY_MODEL_a02030ba8f324d93a7ed6cc793d70a3b" } }, - "ab878626da504834b827f1762bcd2c20": { + "d33fba0d78fb41f983c55f5cd2a0a740": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2673,26 +3447,10 @@ "width": null } }, - "f5a6de62a96340f4ab3094f08e8d54ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "af1e4e3e52304c51a46eab4b989a3f0b": { + "d7071582bfbe4ec4b2c3c9843e5481ae": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2741,10 +3499,10 @@ "width": null } }, - "b0b8bb8fe0bc47908273136304cf4870": { + "e1c9df12fa034c93a9b3530ea4a7c5aa": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2755,908 +3513,554 @@ "_view_name": "StyleView", "description_width": "" } - } - } - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nxNf1l8Ye_U9" - }, - "source": [ - "[`pyannote.audio`](https://github.com/pyannote/pyannote-audio) is an open-source toolkit written in Python for **speaker diarization**. \n", - "\n", - "Based on [`PyTorch`](https://pytorch.org) machine learning framework, it provides a set of trainable end-to-end neural building blocks that can be combined and jointly optimized to build speaker diarization pipelines. \n", - "\n", - "`pyannote.audio` also comes with pretrained [models](https://huggingface.co/models?other=pyannote-audio-model) and [pipelines](https://huggingface.co/models?other=pyannote-audio-pipeline) covering a wide range of domains for voice activity detection, speaker segmentation, overlapped speech detection, speaker embedding reaching state-of-the-art performance for most of them. \n", - "\n", - "**This notebook will teach you how to apply those pretrained pipelines on your own data.**\n", - "\n", - "Make sure you run it using a GPU (or it might otherwise be slow...)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "42nBTa_QgooG" - }, - "source": [ - "## Installation" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vJGyKTQJqdzq", - "outputId": "28eabcb6-b64b-4e78-9dc0-a181797c3e84", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "source": [ - "# for speechbrain\n", - "!pip install -qq torch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 torchtext==0.12.0\n", - "!pip install -qq speechbrain==0.5.12\n", - "\n", - "# pyannote.audio\n", - "!pip install -qq pyannote.audio\n", - "\n", - "# for visualization purposes\n", - "!pip install -qq moviepy ipython==7.34.0" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[K |████████████████████████████████| 750.6 MB 12 kB/s \n", - "\u001b[K |████████████████████████████████| 21.0 MB 1.4 MB/s \n", - "\u001b[K |████████████████████████████████| 2.9 MB 49.5 MB/s \n", - "\u001b[K |████████████████████████████████| 10.4 MB 56.5 MB/s \n", - "\u001b[K |████████████████████████████████| 496 kB 34.8 MB/s \n", - "\u001b[K |████████████████████████████████| 101 kB 13.2 MB/s \n", - "\u001b[K |████████████████████████████████| 1.2 MB 58.4 MB/s \n", - "\u001b[K |████████████████████████████████| 596 kB 56.7 MB/s \n", - "\u001b[K |████████████████████████████████| 109 kB 75.9 MB/s \n", - "\u001b[K |████████████████████████████████| 546 kB 74.2 MB/s \n", - "\u001b[?25h Building wheel for hyperpyyaml (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[K |████████████████████████████████| 385 kB 30.0 MB/s \n", - "\u001b[K |████████████████████████████████| 110 kB 66.8 MB/s \n", - "\u001b[K |████████████████████████████████| 79 kB 9.1 MB/s \n", - "\u001b[K |████████████████████████████████| 47 kB 5.7 MB/s \n", - "\u001b[K |████████████████████████████████| 41 kB 537 kB/s \n", - "\u001b[K |████████████████████████████████| 129 kB 63.0 MB/s \n", - "\u001b[K |████████████████████████████████| 419 kB 65.9 MB/s \n", - "\u001b[K |████████████████████████████████| 51 kB 201 kB/s \n", - "\u001b[K |████████████████████████████████| 65 kB 5.4 MB/s \n", - "\u001b[K |████████████████████████████████| 585 kB 71.6 MB/s \n", - "\u001b[K |████████████████████████████████| 117 kB 74.6 MB/s \n", - "\u001b[K |████████████████████████████████| 130 kB 64.6 MB/s \n", - "\u001b[K |████████████████████████████████| 308 kB 77.2 MB/s \n", - "\u001b[K |████████████████████████████████| 81 kB 11.4 MB/s \n", - "\u001b[K |████████████████████████████████| 209 kB 75.5 MB/s \n", - "\u001b[K |████████████████████████████████| 140 kB 74.8 MB/s \n", - "\u001b[K |████████████████████████████████| 1.1 MB 64.1 MB/s \n", - "\u001b[K |████████████████████████████████| 58 kB 7.2 MB/s \n", - "\u001b[K |████████████████████████████████| 144 kB 68.4 MB/s \n", - "\u001b[K |████████████████████████████████| 271 kB 68.2 MB/s \n", - "\u001b[K |████████████████████████████████| 94 kB 4.3 MB/s \n", - "\u001b[K |████████████████████████████████| 78 kB 8.5 MB/s \n", - "\u001b[K |████████████████████████████████| 112 kB 68.5 MB/s \n", - "\u001b[K |████████████████████████████████| 147 kB 73.3 MB/s \n", - "\u001b[K |████████████████████████████████| 49 kB 6.3 MB/s \n", - "\u001b[?25h Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for julius (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for pyperclip (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[K |████████████████████████████████| 793 kB 22.7 MB/s \n", - "\u001b[K |████████████████████████████████| 381 kB 72.2 MB/s \n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "jupyter-console 5.2.0 requires prompt-toolkit<2.0.0,>=1.0.0, but you have prompt-toolkit 3.0.30 which is incompatible.\n", - "google-colab 1.0.0 requires ipython~=5.5.0, but you have ipython 7.34.0 which is incompatible.\u001b[0m\n", - "\u001b[?25h" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5pSYhjbMhTwT" - }, - "source": [ - "# Visualization with `pyannote.core`\n", - "\n", - "For the purpose of this notebook, we will download and use an audio file coming from the [AMI corpus](http://groups.inf.ed.ac.uk/ami/corpus/), which contains a conversation between 4 people in a meeting room." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "w7YuPI28qlwZ" - }, - "source": [ - "!wget -q http://groups.inf.ed.ac.uk/ami/AMICorpusMirror/amicorpus/ES2004a/audio/ES2004a.Mix-Headset.wav\n", - "DEMO_FILE = {'uri': 'ES2004a.Mix-Headset', 'audio': 'ES2004a.Mix-Headset.wav'}" - ], - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W0_wwm3dj_bO" - }, - "source": [ - "Because AMI is a benchmarking dataset, it comes with manual annotations (a.k.a *groundtruth*). \n", - "Let us load and visualize the expected output of the speaker diarization pipeline.\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "1pra1iVnrp5D" - }, - "source": [ - "!wget -q https://raw.githubusercontent.com/pyannote/AMI-diarization-setup/main/only_words/rttms/test/ES2004a.rttm" - ], - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "aj1obYqhr3Qk", - "outputId": "2b1e6b24-c355-4edb-bf8f-46c1bd7f92d2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 118 - } - }, - "source": [ - "# load groundtruth\n", - "from pyannote.database.util import load_rttm\n", - "_, groundtruth = load_rttm('ES2004a.rttm').popitem()\n", - "\n", - "# visualize groundtruth\n", - "groundtruth" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/png": "\n" - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JDhKjfUBlsjH" - }, - "source": [ - "For the rest of this notebook, we will only listen to and visualize a one-minute long excerpt of the file (but will process the whole file anyway)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "YAa_HoA9mnTZ", - "outputId": "6ff35b05-9b2c-49fa-bd4b-b674a44c7118", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 117 - } - }, - "source": [ - "from pyannote.core import Segment, notebook\n", - "# make notebook visualization zoom on 600s < t < 660s time range\n", - "EXCERPT = Segment(600, 660)\n", - "notebook.crop = EXCERPT\n", - "\n", - "# visualize excerpt groundtruth\n", - "groundtruth" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACsCAYAAADmO9AtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUdElEQVR4nO3df7BtZXkf8O9TrlKiSdVAU4OklzJqJqgxchMhExxjmmKCA2hNQ4zjjzJVk+pMfyStJtNysLVTFaVFGyoGEq0gWhvxjijI+CNx2sHkosgPCQ1EDdyAje0oohZEnv6x1+09Xs6595yz97n77HU+n5kzd++137XWs971nHXWfu777l3dHQAAAADG5a/NOwAAAAAAZk/RBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCERlP0qarvVtUNy352VtWzq+rrByz/u6u0f+2w/Piq+kxV3V5V76uqRw7Ln1VVn62qB6vqhcv2+7eH5TdU1S1V9ar59MDszatPh9d+pKo+VlW3VtUXqmrn4T7+zVBVXVXvWfZ8R1X9VVV9eHj+suH58n78saHvv33A8pcM65xUVTcN/XthVdWw/JeGnHyoqnatEMuPVNV9VfUbh+v4N9tG+3d47cSq+kRV3VZVf1ZV/2pZX9bQt7dX1Y1V9Yxl+7i6qr62bx/Llv9+VX1x2X6efnh6AQAAYGLHvAOYoW939/e8qRoKBZ/u7uetpf3gjUku6O4rquo/JzknyUVJ/iLJy5Ic+Ab57iSndPf9VfXoJDdX1e7u/supjmZrmFefJsm7k7yhu68d+vWhDR/F1vLNJE+pqqO6+9tJfj7J3gPavK+7X718wdDvd6zSvxcl+UdJPpPkI0mem+SjSW5O8oIk71gllrcO7cZko/17VJLdSX6tuz9WVd+X5L8l+fUk/ynJLyR54vDzzEz6/JnD6m9O8n1JXrlCPL/Z3R+YyZEBAACs02hG+szC8L/6z0my703au5KclSTd/aXuvjEHFB+6+4Huvn94emT06ffYSJ8OIy92dPe1Q7v7uvtbhy/qTfeRJKcPj38lyXs3uqGqenySH+ju67q7MymW7evfW7v7tlXWOyvJF5PcstF9b2Eb6d8XJfnv3f2xJBny7dVJXju8fmaSd/fEdUkeM/R9uvvjSb4xw/gBAABmYkwFiqOWTaP44LLlpx4wleOEFdrfUFW/nOQHk3ytux8c2tyV5NhD7biqjquqG5PcmeSNIxnlk8yvT5+U5GtV9QdV9bmqenNVHTHbQ5urK5KcXVV/PcnTMhmhs9wvH9CPRw3LTzhg+amZ9OVdy9Y9ZP8OI6f+ZZLzZnI0W89G+vfEJNcvb9TddyR5dFX9QCZ9eueyl9d0bUjyhmE62AVVdeRGDwgAAGAjNmV618nnXrOU5NwZbvK86847bekQbVabWrTmqUhVdfRGguvuO5M8rap+OMmVVfWB7v7KRra1mjOuPH0pM+7T3WddtXSINvPq0x1JTk3yE5lMAXtfJtPALtnAtlb1jjPfs5QZ9+krP/TipUM16u4bh+lav5LJqJQDrTT9KFlhetdKn9WzBkuZTLe7b9jupth77HFLmXH/Hrv3zqVDNZqif2ftdUnuSfLIJBdnUmh7/WbsCAAAYCVjGukzC/87k2kb+4phT8jDPw9kVcMIn5szKVgwsZE+vSvJDd3958MIoSuTPOMQ6yya3UnOzxRTuwZ7M+nTfdbSv89M8qaq+lKSf5Lkt6rq1QdfZeGst3+/kOSk5Quq6u8kua+7782kT49b9vIh+7m77x6mg92f5PeS/NQaYwEAAJgJRZ9lhs9E+WSSfd8k9dIkHzrYOlX1hH3Tb6rqsUl+JsmKn6OyHW2kT5P8SSaFomOG58/J5E35mFya5LzuvmmajXT33UnuraqTh89PekkO0b/dfWp37+zunUn+Q5J/191vnyaOLWi9/XtZkp+p/d9Ed1SSC5O8aXh9d5KXDN/idXKSrw99v6p9n/kznJezMikIAwAAHDY1eU+++Krqvu5+9AHLnp3JG+AvLlv8b7v7A1X13STL3xBe3d2vHf53/4okj0vyuSQvHr6Z6yeTfDDJY5P83yT3dPeJVfXzSd6SpJNUkrd398Wbc5SH17z6dNjPvn6tTD5r5RXd/cBmHOfhdJA+/Y3ufl5VvSyTb4NaPork15P8ZZJb870FxUu7+8JhitfvJzkqk2/jek13d1U9P8nbkhyT5GuZjJ467YB9L2UymuX8mR3kHG20f7v7f1TVUzPpr8cnOSLJf0ny+qEvK8nbM/lmtG8leXl37xm2/+kkP5rk0ZmMbDunu6+pqk9k0veV5IYkr+ru+zbp0AEAAB5mNEUfAAAAAPYzvQsAAABghBR9AAAAAEZI0QcAAABghBR9AAAAAEZI0QcAAABghHbMYiNHH31079y5cxabAgAAACDJ9ddf/9XuPmaj68+k6LNz587s2bNnFpsCAAAAIElVfXma9U3vAgAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEZpJ0edb/+fbs9gMfI/z/+tb5h0C63T5rZclSfa89/NTbWfa9beCe9/y1nmHsKKtFNdGzvM8c2Nffq/3NRbLNOdy37rv/OTtswqHTTbLc+U6AOO21X7Ht1o8ydaMiRkVfb6p6MMm+KNHfGLeIbBOV9x2eZLk+itummo7066/FXzjrRfMO4QVbaW4NnKe55kb+/J7va+xWKY5l/vWveRTd8wqHDbZLM+V6wCM21b7Hd9q8SRbMyZM7wIAAAAYJUUfAAAAgBHaMasNvePM98xqUzDx8uSMK0+fdxRskGtCsvfY4+Ydwpa3aHnimrQ9zOI8n3zuNTOIhEXjGgEcTq45rIWRPgAAAAAjpOgDAAAAMEIzm971yg+9eFabgiTJVVe+N7vPumreYbAOy4eYTnNNWLQpP6s5du+d8w7hYbbalLP15sm8c2O1a5Lh1eOy0b89y/PguvNOm1U4bKJZT8Nz3wLjtRX/1m+1a85W7COM9AEAAAAYJUUfAAAAgBFS9AEAAAAYoZkUfR71uKNmsRn4Hs/6znPmHQLrdPaTX5QkOensp061nWnX3wq+/5/903mHsKKtFNdGzvM8c2Nffq/3NRbLNOdy37rnPPuEWYXDJpvluXIdgHHbar/jWy2eZGvGRFLdPfVGdu3a1Xv27JlBOAAAAAAkSVVd3927Nrq+6V0AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4zcvmtlx2WdWbhwP2uN453fvL2dS2fxrTb3IyYtjP9OZ3N7j/nh+1K7o/Havcka7lXufctb13XvtbbHtbjUNcl163N64Pt2Lfb8ZhZO0WfGbnitssPyzqzcOB+1xvHJZ+6Y13LpzHtNjcjpu1Mf05ns/vP+WG7kvvjsdo9yVruVb7x1gvWta/1tof1ONR1yXVr8/pgO/btdjxm1k7RBwAAAGCEFH0AAAAARmjHvAMYkzOuPH3eIazZtLGefO41M4pka+2LQ3M+tjbnB1h009yj7D32uBlGAtPxN/nQ9NHs6EtWY6QPAAAAwAgp+gAAAACMkOldM7T7rKvW1X6e08GWx7qROK4777SHLdusIYUr7WutDHOcvWnOx3Z3OPLR+WE7cq0fl5Xup9Z6r3Ls3jvXvB9TwdhsB/ub7Lo1sRn3Ldu1b90Djle9frr1jfQBAAAAGCFFHwAAAIARUvQBAAAAGKEjlpaWpt7IxRdfvPSKV7xi+mgWWHfy1GOetunrzMKB+11vHJ3kpOMft+bl05h2m5sR03amP6ez2f3n/LBdyf3xWO2eZK33Kkf+9Cnr2t9628NaHeq65Lq1eX2wHft2Ox7zdnLeeefdvbS0dPFG16/unjqIXbt29Z49e6beDgAAAAATVXV9d+/a6PqmdwEAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+hwG7/zk7etaDsB+l9962UGfAwAAK1P0OQwu+dQd61oOwH5X3Hb5QZ8DAAArU/QBAAAAGCFFHwAAAIARUvQBAAAAGKEd8w5guzj53GvmHQLAwjrjytPnHQIAACwcI30AAAAARkjRBwAAAGCETO86TK4777SHLTPlC2Btdp911f9/bKoXAACsjZE+AAAAACOk6AMAAAAwQoo+h8E5zz5hXcsB2O/sJ7/ooM8BAICVVXdPvZFdu3b1nj17ZhAOAAAAAElSVdd3966Nrm+kDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjFB19/QbqfpGktumDwe2tKOTfHXeQcAmk+dsB/Kc7UCesx3Ic7aDJ3f392905R0zCuK27t41o23BllRVe+Q5YyfP2Q7kOduBPGc7kOdsB1W1Z5r1Te8CAAAAGCFFHwAAAIARmlXR5+IZbQe2MnnOdiDP2Q7kOduBPGc7kOdsB1Pl+Uw+yBkAAACArcX0LgAAAIARWlPRp6oeU1UfqKo/rapbq+qUqnpcVV1bVX82/PvYoW1V1YVVdXtV3VhVz9jcQ4DZWCXPf6mqbqmqh6pq1wHtXzfk+W1Vddq84ob1WCXP3zw8v7GqPlhVj1nWXp6zcFbJ838z5PgNVfWxqvrhoa37FhbSSnm+7LV/XlVdVUcPz+U5C2mV6/lSVe0druc3VNUvLmvvvoWFs9r1vKpeMyy7paretKz9uvJ8rSN9/mOSq7v7R5P8eJJbk7w2yce7+4lJPj48T5JfSPLE4ecVSS5a4z5g3lbK85uTvCDJHy1vWFU/luTsJCcmeW6S36mqIw5vuLAhK+X5tUme0t1PS/I/k7wukecstJXy/M3d/bTufnqSDyf510Nb9y0sqpXyPFV1XJK/l+QvlrWV5yyqFfM8yQXd/fTh5yOJ+xYW2sPyvKp+NsmZSX68u09Mcn6ysTw/ZNGnqv5GkmcluSRJuvuB7v7aEMC7hmbvSnLW8PjMJO/uieuSPKaqHr+OA4bDbrU87+5bu/u2FVY5M8kV3X1/d38xye1JfurwRQzrd5A8/1h3Pzg0uy7JE4bH8pyFc5A8v3dZs0cl2fehhu5bWDgHuT9PkguS/Ivsz/FEnrOADpHnK3HfwsI5SJ7/WpJ/3933D8v/17DKuvN8LSN9jk/yV0l+r6o+V1W/W1WPSvJD3X330OaeJD80PD42yZ3L1r9rWAZb2Wp5vhp5ziJaS57/wyQfHR7LcxbRqnleVW+oqjuT/Gr2j/SR5yyiFfO8qs5Msre7P39Ae3nOIjrYfcurh6mKl9bwMSOR5yym1fL8SUlOrarPVNUfVtVPDu3XnedrKfrsSPKMJBd1908k+Wb2T+VKkvTkK8B8DRiL7JB5DiNw0Dyvqt9O8mCSy+YTHszEqnne3b/d3cdlkuOvnl+IMLWV8nwpyW9lf0ETFt1q1/OLkpyQ5OlJ7k7ylrlFCNNbLc93JHlckpOT/GaS91dVbWQHayn63JXkru7+zPD8A0NQX9k3LHT4d99wo71Jjlu2/hOGZbCVrZbnq5HnLKJV87yqXpbkeUl+dSjkJ/KcxbSW6/llSf7+8Fies4hWy/Pjk3y+qr6USS5/tqr+VuQ5i2nFPO/ur3T3d7v7oSTvzP6pLfKcRbTa9fyuJH8wTMv94yQPJTk6G8jzQxZ9uvueJHdW1ZOHRT+X5AtJdid56bDspUk+NDzeneQlw7cEnJzk68umgcGWdJA8X83uJGdX1ZFVdXwmH4z4x5scJkxltTyvqudm8vkPZ3T3t5atIs9ZOAfJ8ycua3Zmkj8dHrtvYeGskuef7e6/2d07u3tnJm8YnjG0lecsnINcz5d/HtXzM/nilcR9CwvoIO9Dr0zys0lSVU9K8sgkX80G8nzHGmN5TZLLquqRSf48ycszKRi9v6rOSfLlJP9gaPuRJL+YyQcKfWtoC4vgYXleVc9P8rYkxyS5qqpu6O7TuvuWqnp/Jr+QDyb5x9393blFDmu30vX8T5IcmeTaYdTodd39KnnOAlspz393uKF6KJP7llcNbd23sKhWyvPVyHMW1Up5fmFVPT2Tjxf5UpJXJon7FhbYSnn+zSSXVtXNSR5I8tJhNP6687z2j+IHAAAAYCzW8pk+AAAAACwYRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AYGFV1Q9W1Q3Dzz1VtXd4fF9V/c684wMAmCdf2Q4AjEJVLSW5r7vPn3csAABbgZE+AMDoVNWzq+rDw+OlqnpXVX26qr5cVS+oqjdV1U1VdXVVPWJod1JV/WFVXV9V11TV4+d7FAAA01H0AQC2gxOSPCfJGUnek+ST3f3UJN9OcvpQ+Hlbkhd290lJLk3yhnkFCwAwCzvmHQAAwGHw0e7+TlXdlOSIJFcPy29KsjPJk5M8Jcm1VZWhzd1ziBMAYGYUfQCA7eD+JOnuh6rqO73/Qw0fyuR+qJLc0t2nzCtAAIBZM70LACC5LckxVXVKklTVI6rqxDnHBAAwFUUfAGDb6+4HkrwwyRur6vNJbkjy0/ONCgBgOr6yHQAAAGCEjPQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIAR+n9eCFmA+OnY+QAAAABJRU5ErkJggg==\n" - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TpqTxB12F9Do" - }, - "source": [ - "This nice visualization is brought to you by [`pyannote.core`](http://pyannote.github.io/pyannote-core/) and basically indicates when each speaker speaks. " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "62flXtNIsA9q", - "outputId": "7fe6edc9-194b-45c5-d3fb-973bf0553282", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 119 - } - }, - "source": [ - "from pyannote.audio import Audio \n", - "from IPython.display import Audio as IPythonAudio\n", - "waveform, sr = Audio().crop(DEMO_FILE, EXCERPT)\n", - "IPythonAudio(waveform.flatten(), rate=sr)" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.7/dist-packages/resampy/interpn.py:114: NumbaWarning: \u001b[1m\u001b[1mThe TBB threading layer requires TBB version 2019.5 or later i.e., TBB_INTERFACE_VERSION >= 11005. Found TBB_INTERFACE_VERSION = 9107. The TBB threading layer is disabled.\u001b[0m\u001b[0m\n", - " _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)\n" - ] }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " " - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xD7QfHCHEIgE" - }, - "source": [ - "# Processing your own audio file (optional)\n", - "\n", - "In case you just want to go ahead with the demo file, skip this section entirely.\n", - "\n", - "In case you want to try processing your own audio file, proceed with running this section. It will offer you to upload an audio file (preferably a `wav` file but all formats supported by [`SoundFile`](https://pysoundfile.readthedocs.io/en/latest/) should work just fine)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CKsR0OZqLj1d" - }, - "source": [ - "## Upload audio file" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HQRCi0C7EHor", - "outputId": "79a008b1-6ae9-48bd-b57a-22af28d1341b", - "colab": { - "resources": { - "http://localhost:8080/nbextensions/google.colab/files.js": { - "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgZG8gewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwoKICAgICAgbGV0IHBlcmNlbnREb25lID0gZmlsZURhdGEuYnl0ZUxlbmd0aCA9PT0gMCA/CiAgICAgICAgICAxMDAgOgogICAgICAgICAgTWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCk7CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPSBgJHtwZXJjZW50RG9uZX0lIGRvbmVgOwoKICAgIH0gd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCk7CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", - "ok": true, - "headers": [ - [ - "content-type", - "application/javascript" - ] - ], - "status": 200, - "status_text": "OK" - } - }, - "base_uri": "https://localhost:8080/", - "height": 121 - } - }, - "source": [ - "import google.colab\n", - "own_file, _ = google.colab.files.upload().popitem()\n", - "OWN_FILE = {'audio': own_file}\n", - "notebook.reset()\n", - "\n", - "# load audio waveform and play it\n", - "waveform, sample_rate = Audio()(OWN_FILE)\n", - "IPythonAudio(data=waveform.squeeze(), rate=sample_rate, autoplay=True)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {} + "e3169ca885e04536a709d5751173ce9a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_358c3a67f8b54c4c899e095611fa116b", + "placeholder": "​", + "style": "IPY_MODEL_e1c9df12fa034c93a9b3530ea4a7c5aa", + "value": " 318/318 [00:00<00:00, 11.0kB/s]" + } + }, + "e4c1e9affaba4045a3ec903091b6f454": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saving sample.wav to sample.wav\n" - ] + "e7728d9c55e44274966f8f6dbc445c54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "output_type": "execute_result", - "data": { - "text/html": [ - "\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dLDSdzCuI0ED" - }, - "source": [ - "Simply replace `DEMO_FILE` by `OWN_FILE` in the rest of the notebook.\n", - "\n", - "Note, however, that unless you provide a groundtruth annotation in the next cell, you will (obviously) not be able to visualize groundtruth annotation nor evaluate the performance of the diarization pipeline quantitatively" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "numGt3msL39D" - }, - "source": [ - "## Upload groundtruth (optional)\n", - "\n", - "The groundtruth file is expected to use the RTTM format, with one line per speech turn with the following convention:\n", - "\n", - "```\n", - "SPEAKER {file_name} 1 {start_time} {duration} {speaker_name} \n", - "```" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "f5u8wRm3GYFr", - "colab": { - "resources": { - "http://localhost:8080/nbextensions/google.colab/files.js": { - "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgZG8gewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwoKICAgICAgbGV0IHBlcmNlbnREb25lID0gZmlsZURhdGEuYnl0ZUxlbmd0aCA9PT0gMCA/CiAgICAgICAgICAxMDAgOgogICAgICAgICAgTWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCk7CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPSBgJHtwZXJjZW50RG9uZX0lIGRvbmVgOwoKICAgIH0gd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCk7CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK", - "ok": true, - "headers": [ - [ - "content-type", - "application/javascript" - ] - ], - "status": 200, - "status_text": "OK" - } - }, - "base_uri": "https://localhost:8080/", - "height": 193 + "e928540e99564d808cb2d12c92daa498": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c9974003727a401797953ef2885db5a2", + "placeholder": "​", + "style": "IPY_MODEL_77a361d1ff214e8799891bbeb28a0789", + "value": "Downloading: 100%" + } }, - "outputId": "a0c2b7a7-177d-430c-fc7e-d93d168aaf90" - }, - "source": [ - "groundtruth_rttm, _ = google.colab.files.upload().popitem()\n", - "groundtruths = load_rttm(groundtruth_rttm)\n", - "if OWN_FILE['audio'] in groundtruths:\n", - " groundtruth = groundtruths[OWN_FILE['audio']]\n", - "else:\n", - " _, groundtruth = groundtruths.popitem()\n", - "groundtruth" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {} + "e98cf7a63c814ffd94f69928f0700ebf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aba21021d3bb4565a58ffa40049810db", + "placeholder": "​", + "style": "IPY_MODEL_f7812fa7fbf744c1b261b985d085e28e", + "value": "Downloading: 100%" + } + }, + "ea95ffd922c0455d957120f034e541f8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "eae11f84c2644ada8295b445c924baec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2cbf0faadd4842c8b22e10541ff9de4e", + "placeholder": "​", + "style": "IPY_MODEL_ab32c7daa1d9404fb921f39fbc4fc05c", + "value": "Downloading: 100%" + } + }, + "ebc9801e164a44b3b6f8dc7f590e1c79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saving sample.rttm to sample.rttm\n" - ] + "ecd8e5e364d34ea8bfbba4fbd467384d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_554e567a83b348f88092c6ba01830930", + "placeholder": "​", + "style": "IPY_MODEL_6e334cad2e94462cae6e722bd6f11a9e", + "value": "Downloading: 100%" + } }, - { - "output_type": "execute_result", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABHQAAACsCAYAAAAaLvvnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOHUlEQVR4nO3de6ykZ10H8O+v3YIGCghbG1yqC+WiBWwpa9OKJk2DbQUVURRISCDyhxowXNQEFOzWqEnBtl4AjQVCDYSLgFpBqA1ZBJWCp1As5aJtbFPWUkStbVHLpT//mJdwaLuX2Z1zZp6zn08yOe95b/ObeeeZ951vnmemujsAAAAAjOOoZRcAAAAAwHwEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoTKrqeVX1msPY/uSq+khVXVNVf1VVD1i37OVVdV1Vfa6qzllMxVvbRh2PqnpIVe2pqjsOZ/8AAACwTAKdBaiqo5O8PsnLuvvxSf48ya9Oy05K8qwkj01ybpLXTeuzQfZ3PJL8X5JXJvmVJZUHAAAAh22oQKeq7ldV762qT1bVp6rqmVV1Q1W9auqJ8bGqeuS07nFV9a6q+sfp9qRp/mlTz41PVNU/VNVj7uV+njqts72qzp6mP15Vf1ZV95/WuaGqLqiqjyf5mSSPTvKhaRdXJPnpafppSd7W3Xd2978muS7JaRv6RG2SEY9Hd3+5u/8us2AHAAAAhjRUoJNZD5d/6+6Tu/txSd4/zf/vqSfGa5L83jTv95Nc3N0/kNmH+ddP8z+b5Ie7+wlJfiPJ76y/g6p6epKXJXnKNOsVSZ7c3acmWUvy0nWr/0d3n9rdb0tybWbhTTILFE6YpnckuWndNp+f5m0FIx4PAAAAGN62w9l4744Tdic5bzGlJEnO37H3pt37WX5Nkgur6oIk7+nuD1dVkrx1Wv7WJBdP009OctK0PEkeMPXmeGCSS6vqUUk6yTHr9n9Wkl1Jzu7u26rqx5KclOTvp/3cJ8lH1q3/9nXTP5fkD6rqlUkuS/KVg37UC3L6eZfvzoKPx5Xnn7N7P8sdDwAAAFiCwwp0Nlt3/3NVnZpZb43fqqoPfGPR+tWmv0clOb27v2VozfRFuHu6++lVtTPJB9ctvj7JIzIbrrOWpJJc0d3P3kdJX15X22eTnD3dx6OTPHVatDff2jvkYdO84Q16PAAAAGB4Qw25qqrvSvI/3f3mJK9Ocuq06Jnr/n6jx8bfJPmlddueMk0+MN8MVJ53t7u4MbPhQH9aVY9NcmWSJ637Hpj7TeHAvdX2ndPfozIbFvTH06LLkjyrqu5bVQ9P8qgkH5vjYa+sQY8HAAAADK+6+8BrrYjpJ79fneSuJF9N8otJ3pnZUJsfTXJnkmd393VVtT3Ja5N8X2Y9kT7U3b9QVWckuTSz3hzvTfKc7t5ZVc9Lsqu7X1hVT0jyliQ/nuR7klyQ5L5TGa/o7suq6oZp/S9Ntb0oyQumdd6d5OU9PblV9euZDQH6WpIXd/f7NuQJ2mQDH48bkjwgsyFbt2Y2pOvTG/AUAQAAwIYYKtC5N3f/IM9yOR4AAACw8YYacgUAAADAFuihAwAAAHCk0UMHAAAAYDACHQAAAIDBCHQAAAAABrNtnpW3b9/eO3fu3KBSAAAAAI48V1111Ze6+7h5tpkr0Nm5c2fW1tbmqwoAAACAfaqqG+fdxpArAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwQh0AAAAAAYj0AEAAAAYjEAHAAAAYDACHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAwAh0AAACAwcwV6Hz9llsWeue3XXjRQveXJJfsuW7h+1yEVa1rFW3F52orPibG53XJKtnf63EjrhfgUB3q69F7LnCk8z64eHMFOnctONC5/aKLF7q/JHnDB69f+D4XYVXrWkVb8bnaio+J8Xldskr293rciOsFOFSH+nr0ngsc6bwPLp4hVwAAAACDEegAAAAADGbbvBvs3XHCRtSxUKefd/myS+AwOYawObQ1RjHC9QcciPdcABZJDx0AAACAwQh0AAAAAAYz95CrHXtvWtidb1T36SvPP2dD9ns4dLGdzyoew8Ph+LOqtlpbY1wHep9c5PUHHI7DuX71ngscyXwmWjw9dAAAAAAGI9ABAAAAGIxABwAAAGAwcwU6Rx1//ELv/NiXvmSh+0uS55954sL3uQirWtcq2orP1VZ8TIzP65JVsr/X40ZcL8ChOtTXo/dc4EjnfXDxqrsPeuVdu3b12traBpYDAAAAcGSpqqu6e9c82xhyBQAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEehsgtsuvGjZJQxvs55DxwruSbtg1Vyy57oh9gkAq8Z13dYi0NkEt1908bJLGN5mPYeOFdyTdsGqecMHrx9inwCwalzXbS0CHQAAAIDBCHQAAAAABiPQAQAAABjMtmUXcKTYu+OEZZfAQXKsAFbf6eddvuwSAGBIPu9sHXroAAAAAAxGoAMAAAAwGEOuNsmOvTctu4ShbWa3QMcKvpVuuayiK88/Z6H7M4QLgCOFzzsrqmruTfTQAQAAABiMQAcAAABgMAKdTXDsS1+y7BKGt1nPoWMF96RdsGqef+aJQ+wTAFaN67qtpbr7oFfetWtXr62tbWA5AAAAAEeWqrqqu3fNs40eOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAAAAAgxHoAAAAAAxGoAMAAAAwGIEOAAAAwGAEOgAAAACDEegAAAAADEagAwAAADAYgQ4AAADAYAQ6AAAAAIOp7j74lav+PcmNG1cO3KvtSb607CKAQ6L9wpi0XRiTtgvjekx3HzvPBtvmWbm7j5uvHjh8VbXW3buWXQcwP+0XxqTtwpi0XRhXVa3Nu40hVwAAAACDEegAAAAADEagwwj+ZNkFAIdM+4UxabswJm0XxjV3+53rS5EBAAAAWD49dAAAAAAGI9BhpVTVG6vqi1X1qXXzHlxVV1TVv0x/v2OZNQL3tI+2u7uq9lbV1dPtKcusEbinqjqhqvZU1aer6tqqetE037kXVtx+2q/zL6ywqvq2qvpYVX1yarvnT/MfXlUfrarrqurtVXWfA+1LoMOqeVOSc+8272VJPtDdj0rygel/YLW8Kfdsu0lycXefMt3+epNrAg7sa0l+ubtPSnJ6khdU1Ulx7oUR7Kv9Js6/sMruTHJWd5+c5JQk51bV6UkuyKztPjLJfyV5/oF2JNBhpXT3h5L8591mPy3JpdP0pUl+clOLAg5oH20XWHHdfXN3f3yavj3JZ5LsiHMvrLz9tF9ghfXMHdO/x0y3TnJWkndO8w/q3CvQYQTHd/fN0/QXkhy/zGKAubywqv5pGpJlyAassKrameQJST4a514Yyt3ab+L8Cyutqo6uqquTfDHJFUmuT3Jrd39tWuXzOYiAVqDDUHr2s2x+mg3G8EdJTsysK+nNSS5cbjnAvlTV/ZO8K8mLu/u29cuce2G13Uv7df6FFdfdX+/uU5I8LMlpSb73UPYj0GEEt1TVQ5Nk+vvFJdcDHITuvmU6Wd2V5JLMTlbAiqmqYzL7MPiW7n73NNu5FwZwb+3X+RfG0d23JtmT5IwkD6qqbdOihyXZe6DtBTqM4LIkz52mn5vkL5dYC3CQvvFhcPL0JJ/a17rAclRVJXlDks9090XrFjn3worbV/t1/oXVVlXHVdWDpulvT/IjmX0H1p4kz5hWO6hzb8160cJqqKq3JjkzyfYktyQ5L8lfJHlHku9OcmOSn+1uX74KK2QfbffMzLp7d5Ibkvz8uu/kAFZAVf1Qkg8nuSbJXdPsX8vsezice2GF7af9PjvOv7Cyqur7M/vS46Mz62Tzju7+zap6RJK3JXlwkk8keU5337nffQl0AAAAAMZiyBUAAADAYAQ6AAAAAIMR6AAAAAAMRqADAAAAMBiBDgAAAMBgBDoAwMqrqodU1dXT7QtVtXeavqOqXrfs+gAANpufLQcAhlJVu5Pc0d2/u+xaAACWRQ8dAGBYVXVmVb1nmt5dVZdW1Yer6saq+qmqelVVXVNV76+qY6b1nlhVf1tVV1XV5VX10OU+CgCA+Ql0AICt5MQkZyX5iSRvTrKnux+f5H+TPHUKdf4wyTO6+4lJ3pjkt5dVLADAodq27AIAABbofd391aq6JsnRSd4/zb8myc4kj0nyuCRXVFWmdW5eQp0AAIdFoAMAbCV3Jkl331VVX+1vflngXZld91SSa7v7jGUVCACwCIZcAQBHks8lOa6qzkiSqjqmqh675JoAAOYm0AEAjhjd/ZUkz0hyQVV9MsnVSX5wuVUBAMzPz5YDAAAADEYPHQAAAIDBCHQAAAAABiPQAQAAABiMQAcAAABgMAIdAAAAgMEIdAAAAAAGI9ABAAAAGIxABwAAAGAw/w9yi/xWuRzNKQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ByXlUaTfneEB" - }, - "source": [ - "# Speaker diarization with `pyannote.pipeline`\n", - "\n", - "We are about to run a full speaker diarization pipeline, that includes speaker segmentation, speaker embedding, and a final clustering step. **Brace yourself!**" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "go1wJBYJnsIx", - "outputId": "2c496b59-b574-4f30-80df-059b5ccb71f7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 273, - "referenced_widgets": [ - "5da54fdb45e1485badd323232321bc96", - "e6f461cb90cc498da90a20f2a6c610e0", - "3e8a884bd6c94a86a324cfa7b9580ea7", - "0b03a220e59848dfafb6411b24c3b37f", - "001a4d8dd8954706802b3add70f34d4b", - "b950f7fe8ea34776bde01ed2f4244f5f", - "f7b175bb312c4f43809c97f98e6e84a3", - "e0c6b8aff2ad4849b3dac392ee388112", - "8ddd76365cfd4408b8ca12309b183967", - "df8dc69ca1cf4e57bfebf4762c0460f0", - "799487c4c6de471c827f120f1c11d9e2", - "9d713ac6d10949fb8d277286d969ad3b", - "e2be4344fe8e4a36be35513041ea74a3", - "2ed58d60a46d4f8491de7ad61d7cc589", - "4ea0432a604e4ac197999debb131c557", - "21cd4080794342dbb3209df737d4f835", - "5cdd23be0a804fd192b6ccaddcb52964", - "889aca71e695487d9cd31c2dae585a64", - "b9fc636fcc6f4a2785999385cd340ab9", - "ebf9284b6e514698b8c7389c758f7520", - "5749dba1be9b4eaaad17928a90ea411c", - "f0f3451d91bf4c5a9a8e20623a7b528d", - "0f8ee54b3031408da0281f7aa98eff25", - "ff89e65846414100b84b1f9cf8b70fd9", - "a950bac54c5742e79cd2de4fda29c2e0", - "3f5459fa25654a76bfec64d1dcb542b6", - "fa10eb9e43fd43a49934cdb4301aefff", - "547cbb166dba40d1b626bd204ff3cf96", - "a2aba4e8152c4fbda91bf59a42f25604", - "69a8212dded54f79adf3f8b60b9eb000", - "25142cfd61084c128fe39adad5016109", - "7e64327fa6e948edae034c8d52bb96b4", - "32895abb57a7426aa089230aa49cfff2", - "747aa6596d1a4b04a6f77e38179776ad", - "e65942656e2347eba0a3fe3ae872159b", - "6e5c84bfa614482b960be701f92ee22c", - "def1205f95274d3db0fafc9c8dd1c913", - "46a04e196f9e461bb43e541270022b8d", - "18a02eecf49746cc972882c2cca9ded2", - "31e65957ea204c1084119e3e39528832", - "aa6ccf1390cc4139b7ba23ea7129b273", - "264cde3fae9d442abdb74b4d45288be3", - "2c28a61cfa474c7498a8381e74639530", - "5895a4011e3143a1931566822a65fd50", - "43ada2ceb8ad44d7b8c1a3a1d0aefe2d", - "42b71601aeef4271ac150288a78bf66a", - "300b503e606147bb8bc9ff7a98be0bac", - "dcce36fe0b0946aaa484ae1f5f7eb6da", - "5beb3def5ae64f05b3ceb26c611dcb79", - "030181ab29c149dfaf4940ea2c713190", - "cdec3493e32e49a1abc5f3c6783a49d9", - "b0efbf56235f4f9f866137c85ea2e189", - "12bee4747bc74073bf4cbfd36f0445cb", - "cb0fc23346fa4b638c3cce496ac6cd70", - "0200715daa5e4f4e8f7839d2ea579f19", - "892182c8c97d426d9acf5467d1354d32", - "32acc8ab3c5643d7a4c3595fd67da269", - "3c3189d90a404112ac7f6081acab7e61", - "34e0fb65b69d470099506bd24362c52c", - "c697909d569d40918acd835108e29ae7", - "d690ed1451e74799a2c4c265cd562ed9", - "f89679f445424ec1ba2203fb8f7753de", - "735b5f02606d4c0288f70469a8ab2902", - "0308a7a44b8b4a9cae7bda21fbad99f5", - "967a5451e89049b4ac4688338495b50e", - "bb655ffda271431892bba5ebab9fd93b", - "f7626a7dae2a421ba31a578746be491d", - "97d03beea7cb4e479e1ee67edfa5f883", - "00751b0ca24b47c1b7d337a5b17fc6c1", - "9fc668b6710c449e898d4b8401fe974c", - "d0ea1fb45fe24bc3b549acc4e05860cd", - "c15d7b4b314b4fb1beb5e8b001114392", - "6f73694403ae46538b7280ab31657ebc", - "f33d2ce982144ced87a7a834fe83bdb8", - "7693042eb27e4994b86ed1afb293be56", - "fd6ccdba0a2746419e744035e1096bf2", - "2dc0b625a4ff4a60abc4fa3c6671c9ab", - "0552df02d084406da5b7c51e18fd2cb0", - "f61c385e5fa542a98652a434ad4cd324", - "923af70854ad4651bd2d2a8556ab59f6", - "170805a3d7ec457aaacd4d9a0acd09d0", - "14713e02e5734e46bfb9f9f815699e39", - "bea9fb28848d4c3da10df4fec3dfdb6b", - "a296598d7d3040e8b8b4dab4f7819e91", - "ab878626da504834b827f1762bcd2c20", - "f5a6de62a96340f4ab3094f08e8d54ef", - "af1e4e3e52304c51a46eab4b989a3f0b", - "b0b8bb8fe0bc47908273136304cf4870" - ] - } - }, - "source": [ - "from pyannote.audio import Pipeline\n", - "pipeline = Pipeline.from_pretrained('pyannote/speaker-diarization')\n", - "diarization = pipeline(DEMO_FILE)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading: 0%| | 0.00/598 [00:00" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACtCAYAAAAtZwOIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARwUlEQVR4nO3de5BkZ1kH4N8rAcsCFGMiRqBcKiIEokZYQaxCjZSCqMRLFBAFUfFSgNfyisLgpUoFpRQBFQyiRkhIeaGQW0AIliWXjSzZhBCNGkxiUKPlBaSCIa9/9NnKONszO7vTsz39zfNUbaX79He+/rrP22e+/uWc09XdAQAAAGAsn7DsAQAAAACweEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABjRM6FNVz66qa6rqqqo6XFWPqKq3V9V1VfW+qvrLqnrg1Pbo8sPTv8s29HW4ql69YdnvVtWF0+3Tq+q9VfW0qjpQVR9d19fhqnrK1O6GqjoyjemKqvqs47yGx07jur6qfmLd8mdOy7qqzljUe7ZXDL7tLp6WX11VF1XVXRf1vu0Fg2+735lew1VVdVlV3WNR7xsAAMCpcNqyB7AIVfXIJF+T5KHdfdsUjNxtevjJ3X2oqr47yfOTPH798jl9nZPkLkkeVVV37+6PbHj8U5K8Kclvd/crqupAkr/r7vM2Gd753X1rVT0vyU8nefomr+EuSV6c5CuS3JTkPVX12u5+f5K/TPK6JG/fxtuxUvbBtrs4ybdOTf8wyXcleenW78pq2Afb7oe6+7+mdr+a5JlJfvH47wwAAMDeMMqRPmclubW7b0uS7r61u/9pQ5t3JPnsbfT1pCS/n+TNSS7Y8Ng9krwhyR9294l+cf+rJPfZ4vGHJ7m+u/++uz+W5NVHn7+739vdN5zg862K0bfd63uS5N1J7nuCz72Xjb7tjgY+leSTkvQJPjcAAMBS7cqRPq96wjlrSZ67wC6f96RLrl3b4vE3J3lOVf1NkrckuaS7r9jQ5muTHFl3/+Kq+uh0+/Lu/tHp9hMy+7/+D0ryrMyOzjjqV5O8vLtfuKHvs6vq8Lr7z+ruv9jQ5rFJ/mSL13CfJDeuu39Tkkds0X4XHFzLgrdbcmjtOG32xbabTuv6tiQ/sEU/J2+t1rLobbfWa8dpM/y2q6pXJHlckvcn+ZEt+gEAANhzhji9q7s/XFUPS/KoJOcnuWTdtTmOfsm8IbMvk0cdc5pJVR3M7MiFf6yqm5NcVFWnd/e/T03+PMkFVfWC7v6XdatudZrJ26rq9CQfTvIzO3mdI9pH2+4lSd4xJ5RYWfth23X306ZTwF6UWTD1ipPtCwAA4FQb5fSudPfHu/vt3f3czK698Y3TQ0/u7vO6++u6+8Ytukhmp5g8qKpuSPJ3ST55XT/J7NSP30zy+qq65zaHdn6Sz0pyOMnztmh3c5L7rbt/32nZ8EbfdlX13CRnJvnhbT7vyhh92yWz1ziNYf2YAAAA9rxdOdJnOhVrbTf6nmf6daA7uvtvp0XnJflgknNPoI9PSPLNST736HVJqur8zI4SeNnRdt39wqr6jCR/VFVfvZ2+u/v2qvrBJEeq6ufXHcGw3nuSPKCq7p/Zl84nJvmW7Y5/MQ6t5RRut2T8bVdV35XkMUke3d13bPc1nbDZqVhru9b/HCNvu+k6Pmd39/XT7ccn+cB2XxcAAMBeMMqRPvdI8sqqen9VXZXkwTn+F+CL1/3U81syO0Xl5g0Xon1HkgdX1VnrV+zuH8/s2h+/n9l7eHb9/5+O/v6NT9bdtyR5VZJnzBtMd9+e2ZESb0pybZJLu/uaJKmq76+qmzI7CuGqqnr5cV7bKhl622V2hMq9k/zV1P9zjvPaVsnI266m13Yks2sSnZXkZ4/z2gAAAPaUmv2oEAAAAAAjGeVIHwAAAADWGeLXu1ZJVX1akrfOeejR3f1vp3o8bJ9tt7psOwAAYD9yehcAAADAgJzeBQAAADAgoQ8AAADAgBZyTZ8zzjijDxw4sIiuAAAAAEhy5ZVX3trdZ57s+gsJfQ4cOJBDhw4toisAAAAAklTVB3eyvtO7AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABrSY0Oe//2kh3bCC3ra2Sx3/1i71y+LYRuzQvP3Hru1TFmGzmvdZGMfOt+WR13z7zoeR5MhrfuOYZW9Ze8rc5Zy8xb2f9gOw/yzzc79X9zl7dVz724JCn1sW0g0r6Irn7VLHL9ulflkc24gdmrf/2LV9yiJsVvM+C+PY+ba8+rJ3LWAcydWXvfiYZf967XvmLufkLe79tB+A/WeZn/u9us/Zq+Pa35zeBQAAADAgoQ8AAADAgE5bWE9rtbCuYObgsgcA7LaV+9thvzS+nW/jVz3hnAWMY3n9c7LsH4BTyT6H7XGkDwAAAMCAhD4AAAAAA1rg6V29sK5YIbt6asahXeybnXNIKQuw8W/Hnj/da95+yWdhLDv923NOnnTJtTsexVancC2if2YWe6qceQvsL8v++78X9znLfk+Yx5E+AAAAAAMS+gAAAAAMSOgDAAAAMKDFXNPnnmctpBtW0Jc+d5c6fvou9cvi2Ebs0Lz9x67tUxZhs5r3WRjHzrfluRc+YgHjSM698BnHLDvznC/MvR/y8IX0z8y89/nk2A/A/rPMz/1e3efs1XHtb9W98wswHzx4sA8d2osXkgIAAABYTVV1ZXef9FWynd4FAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+uzYby15fTh1jrzmN5Y9BIBj2DeNYuOcaDtzpHltzK0Adpu/vatD6LNjL1vy+nDqXH3Zi5c9BIBj2DeNYuOcaDtzpHltzK0Adpu/vatD6AMAAAAwIKEPAAAAwICEPgAAAAADOm3ZAxjDwWUPAE6ZVz3hnGUPAYBhncycyjwMYBl8L1gNjvQBAAAAGJDQBwAAAGBATu9aiEM7WNchyayWJ11y7bKHAPD/OLx8JOvnVNudI22ch5lbAZwKvhecGt9yae1ofUf6AAAAAAxI6AMAAAAwIKHPjj19yevDqXPuhc9Y9hAAjmHfNIqNc6LtzJHmtTG3Atht/vaujuruHXdy8ODBPnRoJ9e1AQAAAGC9qrqyu0/6gnWO9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABhQdffOO6n67yTX7Xw4sKedkeTWZQ8Cdpk6Zz9Q5+wH6pz9QJ2zHzywu+95siuftqBBXNfdBxfUF+xJVXVInTM6dc5+oM7ZD9Q5+4E6Zz+oqkM7Wd/pXQAAAAADEvoAAAAADGhRoc9vL6gf2MvUOfuBOmc/UOfsB+qc/UCdsx/sqM4XciFnAAAAAPYWp3cBAAAADGhboU9V3auqLquqD1TVtVX1yKo6vaour6q/nf77qVPbqqpfr6rrq+qqqnro7r4EWIxN6vybquqaqrqjqg5uaP+TU51fV1WPWda44URsUufPn+5fVVV/XFX3WtdenbNyNqnzn5tq/HBVvbmqPnNqa97CSppX5+se+5Gq6qo6Y7qvzllJm+zP16rq5ml/friqHreuvXkLK2ez/XlVPWtadk1V/fK69idU59s90ufXkryxux+U5POTXJvkJ5K8tbsfkOSt0/0k+aokD5j+fXeSl27zOWDZ5tX51Um+Ick71jesqgcneWKShyR5bJKXVNVdTu1w4aTMq/PLk5zb3Z+X5G+S/GSizllp8+r8+d39ed19XpLXJXnO1Na8hVU1r85TVfdL8pVJ/nFdW3XOqppb50le2N3nTf9en5i3sNKOqfOqOj/JBUk+v7sfkuQFycnV+XFDn6r6lCRfkuR3kqS7P9bd/zEN4JVTs1cm+brp9gVJfq9n3pnkXlV11gm8YDjlNqvz7r62u6+bs8oFSV7d3bd19z8kuT7Jw0/diOHEbVHnb+7u26dm70xy3+m2OmflbFHn/7Wu2d2THL2ooXkLK2eL+XmSvDDJj+XOGk/UOSvoOHU+j3kLK2eLOv++JL/Y3bdNy/9lWuWE63w7R/rcP8m/JnlFVb23ql5eVXdPcu/uvmVq86Ek955u3yfJjevWv2laBnvZZnW+GXXOKtpOnX9HkjdMt9U5q2jTOq+qX6iqG5M8OXce6aPOWUVz67yqLkhyc3e/b0N7dc4q2mre8szpVMWLarrMSNQ5q2mzOv+cJI+qqndV1RVV9YVT+xOu8+2EPqcleWiSl3b3FyT5SO48lStJ0rOfAPMzYKyy49Y5DGDLOq+qZye5PcnFyxkeLMSmdd7dz+7u+2VW489c3hBhx+bV+VqSn8qdgSasus325y9NcnaS85LckuRXljZC2LnN6vy0JKcn+aIkP5rk0qqqk3mC7YQ+NyW5qbvfNd2/bBrUPx89LHT679HDjW5Ocr916993WgZ72WZ1vhl1ziratM6r6tuTfE2SJ09BfqLOWU3b2Z9fnOQbp9vqnFW0WZ3fP8n7quqGzGr5r6vqM6LOWU1z67y7/7m7P97ddyR5We48tUWds4o225/flOSPptNy353kjiRn5CTq/LihT3d/KMmNVfXAadGjk7w/yWuTPHVa9tQkfzrdfm2Sp0y/EvBFSf5z3WlgsCdtUeebeW2SJ1bVJ1bV/TO7MOK7d3mYsCOb1XlVPTaz6z88vrv/Z90q6pyVs0WdP2BdswuSfGC6bd7Cytmkzv+6uz+9uw9094HMvjA8dGqrzlk5W+zP11+P6usz++GVxLyFFbTF99A/SXJ+klTV5yS5W5JbcxJ1fto2x/KsJBdX1d2S/H2Sp2UWGF1aVd+Z5INJvnlq+/okj8vsgkL/M7WFVXBMnVfV1yd5UZIzk/xZVR3u7sd09zVVdWlmH8jbkzyjuz++tJHD9s3bn78nyScmuXw6avSd3f296pwVNq/OXz5NqO7IbN7yvVNb8xZW1bw634w6Z1XNq/Nfr6rzMru8yA1JvidJzFtYYfPq/CNJLqqqq5N8LMlTp6PxT7jO686j+AEAAAAYxXau6QMAAADAihH6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAACurqj6tqg5P/z5UVTdPtz9cVS9Z9vgAAJbJT7YDAEOoqrUkH+7uFyx7LAAAe4EjfQCA4VTVl1XV66bba1X1yqr6i6r6YFV9Q1X9clUdqao3VtVdp3YPq6orqurKqnpTVZ213FcBALAzQh8AYD84O8mXJ3l8kj9I8rbu/twkH03y1VPw86IkF3b3w5JclOQXljVYAIBFOG3ZAwAAOAXe0N3/W1VHktwlyRun5UeSHEjywCTnJrm8qjK1uWUJ4wQAWBihDwCwH9yWJN19R1X9b995UcM7MpsPVZJruvuRyxogAMCiOb0LACC5LsmZVfXIJKmqu1bVQ5Y8JgCAHRH6AAD7Xnd/LMmFSX6pqt6X5HCSL17uqAAAdsZPtgMAAAAMyJE+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwID+D+A6OlzYDTy0AAAAAElFTkSuQmCC\n" - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "smAd5ofBqWWB" - }, - "source": [ - "# Evaluation with `pyannote.metrics`\n", - "\n", - "Because groundtruth is available, we can evaluate the quality of the diarization pipeline by computing the [diarization error rate](http://pyannote.github.io/pyannote-metrics/reference.html#diarization)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "z0Ore3eGpNyy" - }, - "source": [ - "from pyannote.metrics.diarization import DiarizationErrorRate\n", - "metric = DiarizationErrorRate()\n", - "der = metric(groundtruth, diarization)" - ], - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "e0F8IUaqYFU8", - "outputId": "4de50c4e-640a-46d2-d412-c7242cccb263", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "source": [ - "print(f'diarization error rate = {100 * der:.1f}%')" - ], - "execution_count": 12, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "diarization error rate = 18.4%\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2w0Xp_ElrYTa" - }, - "source": [ - "This implementation of diarization error rate is brought to you by [`pyannote.metrics`](http://pyannote.github.io/pyannote-metrics/).\n", - "\n", - "It can also be used to improve visualization by find the optimal one-to-one mapping between groundtruth and hypothesized speakers." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "WPF5JG3Ppt9K", - "outputId": "7963cc1e-b26a-4fde-cdb0-d72057975dd4", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 117 - } - }, - "source": [ - "mapping = metric.optimal_mapping(groundtruth, diarization)\n", - "diarization.rename_labels(mapping=mapping)" - ], - "execution_count": 13, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACsCAYAAADmO9AtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAARnUlEQVR4nO3de5BtV10n8O9PrmAGHxCTURT0xhTEIoghXJ1gGSviaNBQSWBQo1LADDWCCpZVvlCr5MYZpoaHwwxYRkTiYBEMVEpCildIWfj4J+ANueRBzBgETGJAcQadkJgY8vOP3rfS9O1zb6f7dJ8+qz+fqq57zr5r715n9e/sXv2tvfap7g4AAAAAY/myRXcAAAAAgPkT+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADGiY0KeqvlhVh1d97a+qc6rqH9ds//cz2r9i2n5KVX24qm6rqndU1SOn7d9TVR+tqgeq6nmrvu83T9sPV9XNVfXSxYzA/C1qTKf/+6aq+mBV3VJVH6+q/Tv9+rdDVXVVvW3V831V9fdV9Z7p+Yum56vH8cnT2N+7ZvsLpn2eXlU3TuP7hqqqafsPTzX5YFUdWKcv31RVd1fVL+zU6wcAAGDn7Ft0B+bo3u4+Y/WGKSj48+5+9kbaT16d5PXdfXlV/U6SFye5JMnfJHlRkrV/IN+V5BndfV9VfWWSm6rqqu7+2y29mt1hUWOaJH+Q5FXdfc00rg9u+lXsLl9I8pSqOqG7703y/UnuXNPmHd39stUbpnH/xIzxvSTJf07y4STvS/KsJO9PclOS5yZ504y+/I+pHQAAAAMa5kqfeZiukHhmkiumTW9NcmGSdPenuvuGrAkfuvv+7r5vevqoGNMvsZkxraonJ9nX3ddM7e7u7nt2rtfb7n1Jzpse/1iSP9zsgarqcUm+uruv7e7OSlh2ZHxv6e5bZ+x3YZJPJrl5s98bAACA3W2kgOKEVcte3rVq+9lrlsScuk77w1X1o0m+Nsnnu/uBqc0dSb7xeN+4qp5QVTckuT3Jqwe5yidZ3Jg+Kcnnq+qPqur6qnptVT1ivi9toS5PclFVfUWSp2blCp3VfnTNOJ4wbT91zfazszKWd6za97jjO1059ctJLp7LqwEAAGBX2pblXWe98uqDSV45x0NefO3F5x48TptZS4s2vBSpqk7aTOe6+/YkT62qb0hyZVVd0d2f3cyxZjn/yvMOZs5jetWF7z14nDaLGtN9Sc5O8rSsLAF7R1aWgb1lE8ea6U0XvO1g5jymL3n38w8er1F33zAt1/qxrFz1s9Z6y7uSdZZ3rXevng04mJXldndPxwUAAGBAI13pMw//kOQxVXUkDHt8jr7fykzTFT43ZSWwYMVmxvSOJIe7+6+nK4SuTHLmNvZxEa5K8rpsYWnX5M6sjOkRGxnff5fkNVX1qSQ/l+RXq+plx94FAACAZSP0WWW6J8qHkhz5JKkXJnn3sfapqscfWX5TVY9N8t1J1r2Pyl60mTFN8hdZCYpOnp4/M8nHt6eHC3Npkou7+8atHKS770ryT1V11nT/pBfkOOPb3Wd39/7u3p/kfyb5b939W1vpBwAAALtPrfxNvvyq6u7u/so1287Jyh/An1y1+b929xVV9cUkq//g/kB3v6KqviUr91w5Mcn1SZ4/fTLXdyR5V5LHJvnnJJ/p7tOr6vuT/GaSTlJJfqu7f3d7XuXOWtSYTt/nyLhWkuuS/GR3378dr3MnHWNMf6G7n11VL0ry2nzp1To/neRvk9ySLw0UL+3uN0xLvP53khOy8mlcL+/urqrnJHljkpOTfD4rV0+du+Z7H0xyd3e/bm4vEgAAgF1hmNAHAAAAgIdY3gUAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADCgffM4yEknndT79++fx6EAAAAASHLdddd9rrtP3uz+cwl99u/fn0OHDs3jUAAAAAAkqapPb2V/y7sAAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGNJfQ557/e+88DsMSOvSHH9uW4779lsu25bjMj58RW7Xe+WO7zinzMKvmvRfGMY+f5c++811z6Eny5g/ddtS2n7r0I+tuZ/PmNZ7OA7D3LPJ9v1vPObu1X3vdXEKfLwh99qzrLr9xW457+a1v35bjMj9+RmzVeueP7TqnzMOsmvdeGMc8fpYfufnfzKEnyVv+5BNHbbv+0/9v3e1s3rzG03kA9p5Fvu936zlnt/Zrr7O8CwAAAGBAQh8AAACAAe2b14HedMHb5nUoSJKcf+V5i+4CsM2W7XeH89L4tv4z/tmc9cqr59KXWbb7+GyO8wOwk5xz2ChX+gAAAAAMSOgDAAAAMKC5Le96ybufP69DsUS2c2nGVRe+d9uOzda5pJR5WPu7Y7cv91rvvOS9MJat/u456/qrc+3F5265H8dawjWP47NinkvlzFtgb1n07//deM5Z9JiwPlf6AAAAAAxI6AMAAAAwIKEPAAAAwIDmck+fR594wjwOwxJ6+kXfti3Hvei0H9+W4zI/fkZs1Xrnj+06p8zDrJr3XhjHPH6W33n6PXPoSfLic049atvTvvmxOfOUE+dyfFasN86b4TwAe88i3/e79ZyzW/u111V3b/kgBw4c6EOHDs2hOwAAAAAkSVVd190HNru/5V0AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChzxa9/ZbLFro/7KQ3f+i2RXcB4CjOTWNYOyfayBxpvTbmVgDbz+/e5SH02aLLb337QveHnfSWP/nEorsAcBTnpjGsnRNtZI60XhtzK4Dt53fv8hD6AAAAAAxI6AMAAAAwIKEPAAAAwID2LboDIzj/yvMW3QXYMWe98upFdwGAQW1mTmUeBrAY/i5YDq70AQAAABiQ0AcAAABgQJZ3zcFVF7530/u6JJllc+3F5y66CwBfwuXl41g9p9roHGntPMzcCmBn+LtgZ9RvbG1/V/oAAAAADEjoAwAAADAgoc8WXXTajy90f9hJLz7n1EV3AeAozk1jWDsn2sgcab025lYA28/v3uVR3b3lgxw4cKAPHTo0h+4AAAAAkCRVdV13H9js/q70AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGFB199YPUvX/k9y69e7ArnZSks8tuhOwzdQ5e4E6Zy9Q5+wF6py94LTu/qrN7rxvTp24tbsPzOlYsCtV1SF1zujUOXuBOmcvUOfsBeqcvaCqDm1lf8u7AAAAAAYk9AEAAAAY0LxCn9+d03FgN1Pn7AXqnL1AnbMXqHP2AnXOXrClOp/LjZwBAAAA2F0s7wIAAAAY0IZCn6p6TFVdUVV/WVW3VNUzqurEqrqmqv5q+vexU9uqqjdU1W1VdUNVnbm9LwHmY0ad/3BV3VxVD1bVgTXtf2Wq81ur6txF9Rsejhl1/trp+Q1V9a6qesyq9uqcpTOjzv/LVOOHq+qDVfUNU1vzFpbSenW+6v9+vqq6qk6anqtzltKM8/nBqrpzOp8frqofWtXevIWlM+t8XlUvn7bdXFWvWdX+YdX5Rq/0+V9JPtDd35rk25PckuQVSf64u5+Y5I+n50nyg0meOH39ZJJLNvg9YNHWq/Obkjw3yZ+tblhVT05yUZLTkzwryW9X1SN2truwKevV+TVJntLdT03yf5L8SqLOWWrr1flru/up3X1Gkvck+fWprXkLy2q9Ok9VPSHJDyT5m1Vt1TnLat06T/L67j5j+npfYt7CUjuqzqvqe5NckOTbu/v0JK9LNlfnxw19quprknxPkrckSXff392fnzrw1qnZW5NcOD2+IMkf9Iprkzymqh73MF4w7LhZdd7dt3T3revsckGSy7v7vu7+ZJLbknznzvUYHr5j1PkHu/uBqdm1SR4/PVbnLJ1j1Pk/rWr26CRHbmpo3sLSOcb8PElen+SX8lCNJ+qcJXScOl+PeQtL5xh1/lNJ/nt33zdt/7tpl4dd5xu50ueUJH+f5Per6vqq+r2qenSSr+vuu6Y2n0nyddPjb0xy+6r975i2wW42q85nUecso43U+X9K8v7psTpnGc2s86p6VVXdnuQn8tCVPuqcZbRunVfVBUnu7O6PrWmvzllGx5q3vGxaqnhpTbcZiTpnOc2q8yclObuqPlxVf1pV3zG1f9h1vpHQZ1+SM5Nc0t1PS/KFPLSUK0nSKx8B5mPAWGbHrXMYwDHrvKp+LckDSS5bTPdgLmbWeXf/Wnc/ISs1/rLFdRG2bL06P5jkV/NQoAnLbtb5/JIkpyY5I8ldSX5zYT2ErZtV5/uSnJjkrCS/mOSdVVWb+QYbCX3uSHJHd394en7F1KnPHrksdPr3yOVGdyZ5wqr9Hz9tg91sVp3Pos5ZRjPrvKpelOTZSX5iCvITdc5y2sj5/LIk/2F6rM5ZRrPq/JQkH6uqT2Wllj9aVV8fdc5yWrfOu/uz3f3F7n4wyZvz0NIWdc4ymnU+vyPJH03Lcj+S5MEkJ2UTdX7c0Ke7P5Pk9qo6bdr0fUk+nuSqJC+ctr0wybunx1clecH0KQFnJfnHVcvAYFc6Rp3PclWSi6rqUVV1SlZujPiRbe4mbMmsOq+qZ2Xl/g/nd/c9q3ZR5yydY9T5E1c1uyDJX06PzVtYOjPq/KPd/W+7e39378/KHwxnTm3VOUvnGOfz1fejek5WPnglMW9hCR3j79Ark3xvklTVk5I8Msnnsok637fBvrw8yWVV9cgkf53kP2YlMHpnVb04yaeT/MjU9n1JfigrNxS6Z2oLy+CoOq+q5yR5Y5KTk7y3qg5397ndfXNVvTMrb8gHkvxMd39xYT2HjVvvfP4XSR6V5JrpqtFru/ul6pwltl6d/940oXowK/OWl05tzVtYVuvV+SzqnGW1Xp2/oarOyMrtRT6V5CVJYt7CEluvzr+Q5NKquinJ/UleOF2N/7DrvB66ih8AAACAUWzknj4AAAAALBmhDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPALC0quprq+rw9PWZqrpzenx3Vf32ovsHALBIPrIdABhCVR1Mcnd3v27RfQEA2A1c6QMADKeqzqmq90yPD1bVW6vqz6vq01X13Kp6TVXdWFUfqKovn9o9var+tKquq6qrq+pxi30VAABbI/QBAPaCU5M8M8n5Sd6W5EPd/W1J7k1y3hT8vDHJ87r76UkuTfKqRXUWAGAe9i26AwAAO+D93f0vVXVjkkck+cC0/cYk+5OcluQpSa6pqkxt7lpAPwEA5kboAwDsBfclSXc/WFX/0g/d1PDBrMyHKsnN3f2MRXUQAGDeLO8CAEhuTXJyVT0jSarqy6vq9AX3CQBgS4Q+AMCe1933J3lekldX1ceSHE7yXYvtFQDA1vjIdgAAAIABudIHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGNC/Ai8VBrvAw2FYAAAAAElFTkSuQmCC\n" - }, - "metadata": {}, - "execution_count": 13 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "DKk6ePLWp4eB", - "outputId": "e0a18bd2-ebbb-49b1-f2c9-73196177b799", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 117 - } - }, - "source": [ - "groundtruth" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACsCAYAAADmO9AtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUdElEQVR4nO3df7BtZXkf8O9TrlKiSdVAU4OklzJqJqgxchMhExxjmmKCA2hNQ4zjjzJVk+pMfyStJtNysLVTFaVFGyoGEq0gWhvxjijI+CNx2sHkosgPCQ1EDdyAje0oohZEnv6x1+09Xs6595yz97n77HU+n5kzd++137XWs971nHXWfu777l3dHQAAAADG5a/NOwAAAAAAZk/RBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCEFH0AAAAARkjRBwAAAGCERlP0qarvVtUNy352VtWzq+rrByz/u6u0f+2w/Piq+kxV3V5V76uqRw7Ln1VVn62qB6vqhcv2+7eH5TdU1S1V9ar59MDszatPh9d+pKo+VlW3VtUXqmrn4T7+zVBVXVXvWfZ8R1X9VVV9eHj+suH58n78saHvv33A8pcM65xUVTcN/XthVdWw/JeGnHyoqnatEMuPVNV9VfUbh+v4N9tG+3d47cSq+kRV3VZVf1ZV/2pZX9bQt7dX1Y1V9Yxl+7i6qr62bx/Llv9+VX1x2X6efnh6AQAAYGLHvAOYoW939/e8qRoKBZ/u7uetpf3gjUku6O4rquo/JzknyUVJ/iLJy5Ic+Ab57iSndPf9VfXoJDdX1e7u/supjmZrmFefJsm7k7yhu68d+vWhDR/F1vLNJE+pqqO6+9tJfj7J3gPavK+7X718wdDvd6zSvxcl+UdJPpPkI0mem+SjSW5O8oIk71gllrcO7cZko/17VJLdSX6tuz9WVd+X5L8l+fUk/ynJLyR54vDzzEz6/JnD6m9O8n1JXrlCPL/Z3R+YyZEBAACs02hG+szC8L/6z0my703au5KclSTd/aXuvjEHFB+6+4Huvn94emT06ffYSJ8OIy92dPe1Q7v7uvtbhy/qTfeRJKcPj38lyXs3uqGqenySH+ju67q7MymW7evfW7v7tlXWOyvJF5PcstF9b2Eb6d8XJfnv3f2xJBny7dVJXju8fmaSd/fEdUkeM/R9uvvjSb4xw/gBAABmYkwFiqOWTaP44LLlpx4wleOEFdrfUFW/nOQHk3ytux8c2tyV5NhD7biqjquqG5PcmeSNIxnlk8yvT5+U5GtV9QdV9bmqenNVHTHbQ5urK5KcXVV/PcnTMhmhs9wvH9CPRw3LTzhg+amZ9OVdy9Y9ZP8OI6f+ZZLzZnI0W89G+vfEJNcvb9TddyR5dFX9QCZ9eueyl9d0bUjyhmE62AVVdeRGDwgAAGAjNmV618nnXrOU5NwZbvK86847bekQbVabWrTmqUhVdfRGguvuO5M8rap+OMmVVfWB7v7KRra1mjOuPH0pM+7T3WddtXSINvPq0x1JTk3yE5lMAXtfJtPALtnAtlb1jjPfs5QZ9+krP/TipUM16u4bh+lav5LJqJQDrTT9KFlhetdKn9WzBkuZTLe7b9jupth77HFLmXH/Hrv3zqVDNZqif2ftdUnuSfLIJBdnUmh7/WbsCAAAYCVjGukzC/87k2kb+4phT8jDPw9kVcMIn5szKVgwsZE+vSvJDd3958MIoSuTPOMQ6yya3UnOzxRTuwZ7M+nTfdbSv89M8qaq+lKSf5Lkt6rq1QdfZeGst3+/kOSk5Quq6u8kua+7782kT49b9vIh+7m77x6mg92f5PeS/NQaYwEAAJgJRZ9lhs9E+WSSfd8k9dIkHzrYOlX1hH3Tb6rqsUl+JsmKn6OyHW2kT5P8SSaFomOG58/J5E35mFya5LzuvmmajXT33UnuraqTh89PekkO0b/dfWp37+zunUn+Q5J/191vnyaOLWi9/XtZkp+p/d9Ed1SSC5O8aXh9d5KXDN/idXKSrw99v6p9n/kznJezMikIAwAAHDY1eU+++Krqvu5+9AHLnp3JG+AvLlv8b7v7A1X13STL3xBe3d2vHf53/4okj0vyuSQvHr6Z6yeTfDDJY5P83yT3dPeJVfXzSd6SpJNUkrd398Wbc5SH17z6dNjPvn6tTD5r5RXd/cBmHOfhdJA+/Y3ufl5VvSyTb4NaPork15P8ZZJb870FxUu7+8JhitfvJzkqk2/jek13d1U9P8nbkhyT5GuZjJ467YB9L2UymuX8mR3kHG20f7v7f1TVUzPpr8cnOSLJf0ny+qEvK8nbM/lmtG8leXl37xm2/+kkP5rk0ZmMbDunu6+pqk9k0veV5IYkr+ru+zbp0AEAAB5mNEUfAAAAAPYzvQsAAABghBR9AAAAAEZI0QcAAABghBR9AAAAAEZI0QcAAABghHbMYiNHH31079y5cxabAgAAACDJ9ddf/9XuPmaj68+k6LNz587s2bNnFpsCAAAAIElVfXma9U3vAgAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AAACAEZpJ0edb/+fbs9gMfI/z/+tb5h0C63T5rZclSfa89/NTbWfa9beCe9/y1nmHsKKtFNdGzvM8c2Nffq/3NRbLNOdy37rv/OTtswqHTTbLc+U6AOO21X7Ht1o8ydaMiRkVfb6p6MMm+KNHfGLeIbBOV9x2eZLk+itummo7066/FXzjrRfMO4QVbaW4NnKe55kb+/J7va+xWKY5l/vWveRTd8wqHDbZLM+V6wCM21b7Hd9q8SRbMyZM7wIAAAAYJUUfAAAAgBHaMasNvePM98xqUzDx8uSMK0+fdxRskGtCsvfY4+Ydwpa3aHnimrQ9zOI8n3zuNTOIhEXjGgEcTq45rIWRPgAAAAAjpOgDAAAAMEIzm971yg+9eFabgiTJVVe+N7vPumreYbAOy4eYTnNNWLQpP6s5du+d8w7hYbbalLP15sm8c2O1a5Lh1eOy0b89y/PguvNOm1U4bKJZT8Nz3wLjtRX/1m+1a85W7COM9AEAAAAYJUUfAAAAgBFS9AEAAAAYoZkUfR71uKNmsRn4Hs/6znPmHQLrdPaTX5QkOensp061nWnX3wq+/5/903mHsKKtFNdGzvM8c2Nffq/3NRbLNOdy37rnPPuEWYXDJpvluXIdgHHbar/jWy2eZGvGRFLdPfVGdu3a1Xv27JlBOAAAAAAkSVVd3927Nrq+6V0AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4AAAAAI6ToAwAAADBCij4zcvmtlx2WdWbhwP2uN453fvL2dS2fxrTb3IyYtjP9OZ3N7j/nh+1K7o/Havcka7lXufctb13XvtbbHtbjUNcl163N64Pt2Lfb8ZhZO0WfGbnitssPyzqzcOB+1xvHJZ+6Y13LpzHtNjcjpu1Mf05ns/vP+WG7kvvjsdo9yVruVb7x1gvWta/1tof1ONR1yXVr8/pgO/btdjxm1k7RBwAAAGCEFH0AAAAARmjHvAMYkzOuPH3eIazZtLGefO41M4pka+2LQ3M+tjbnB1h009yj7D32uBlGAtPxN/nQ9NHs6EtWY6QPAAAAwAgp+gAAAACMkOldM7T7rKvW1X6e08GWx7qROK4777SHLdusIYUr7WutDHOcvWnOx3Z3OPLR+WE7cq0fl5Xup9Z6r3Ls3jvXvB9TwdhsB/ub7Lo1sRn3Ldu1b90Djle9frr1jfQBAAAAGCFFHwAAAIARUvQBAAAAGKEjlpaWpt7IxRdfvPSKV7xi+mgWWHfy1GOetunrzMKB+11vHJ3kpOMft+bl05h2m5sR03amP6ez2f3n/LBdyf3xWO2eZK33Kkf+9Cnr2t9628NaHeq65Lq1eX2wHft2Ox7zdnLeeefdvbS0dPFG16/unjqIXbt29Z49e6beDgAAAAATVXV9d+/a6PqmdwEAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+gAAAACMkKIPAAAAwAgp+hwG7/zk7etaDsB+l9962UGfAwAAK1P0OQwu+dQd61oOwH5X3Hb5QZ8DAAArU/QBAAAAGCFFHwAAAIARUvQBAAAAGKEd8w5guzj53GvmHQLAwjrjytPnHQIAACwcI30AAAAARkjRBwAAAGCETO86TK4777SHLTPlC2Btdp911f9/bKoXAACsjZE+AAAAACOk6AMAAAAwQoo+h8E5zz5hXcsB2O/sJ7/ooM8BAICVVXdPvZFdu3b1nj17ZhAOAAAAAElSVdd3966Nrm+kDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjJCiDwAAAMAIKfoAAAAAjFB19/QbqfpGktumDwe2tKOTfHXeQcAmk+dsB/Kc7UCesx3Ic7aDJ3f392905R0zCuK27t41o23BllRVe+Q5YyfP2Q7kOduBPGc7kOdsB1W1Z5r1Te8CAAAAGCFFHwAAAIARmlXR5+IZbQe2MnnOdiDP2Q7kOduBPGc7kOdsB1Pl+Uw+yBkAAACArcX0LgAAAIARWlPRp6oeU1UfqKo/rapbq+qUqnpcVV1bVX82/PvYoW1V1YVVdXtV3VhVz9jcQ4DZWCXPf6mqbqmqh6pq1wHtXzfk+W1Vddq84ob1WCXP3zw8v7GqPlhVj1nWXp6zcFbJ838z5PgNVfWxqvrhoa37FhbSSnm+7LV/XlVdVUcPz+U5C2mV6/lSVe0druc3VNUvLmvvvoWFs9r1vKpeMyy7paretKz9uvJ8rSN9/mOSq7v7R5P8eJJbk7w2yce7+4lJPj48T5JfSPLE4ecVSS5a4z5g3lbK85uTvCDJHy1vWFU/luTsJCcmeW6S36mqIw5vuLAhK+X5tUme0t1PS/I/k7wukecstJXy/M3d/bTufnqSDyf510Nb9y0sqpXyPFV1XJK/l+QvlrWV5yyqFfM8yQXd/fTh5yOJ+xYW2sPyvKp+NsmZSX68u09Mcn6ysTw/ZNGnqv5GkmcluSRJuvuB7v7aEMC7hmbvSnLW8PjMJO/uieuSPKaqHr+OA4bDbrU87+5bu/u2FVY5M8kV3X1/d38xye1JfurwRQzrd5A8/1h3Pzg0uy7JE4bH8pyFc5A8v3dZs0cl2fehhu5bWDgHuT9PkguS/Ivsz/FEnrOADpHnK3HfwsI5SJ7/WpJ/3933D8v/17DKuvN8LSN9jk/yV0l+r6o+V1W/W1WPSvJD3X330OaeJD80PD42yZ3L1r9rWAZb2Wp5vhp5ziJaS57/wyQfHR7LcxbRqnleVW+oqjuT/Gr2j/SR5yyiFfO8qs5Msre7P39Ae3nOIjrYfcurh6mKl9bwMSOR5yym1fL8SUlOrarPVNUfVtVPDu3XnedrKfrsSPKMJBd1908k+Wb2T+VKkvTkK8B8DRiL7JB5DiNw0Dyvqt9O8mCSy+YTHszEqnne3b/d3cdlkuOvnl+IMLWV8nwpyW9lf0ETFt1q1/OLkpyQ5OlJ7k7ylrlFCNNbLc93JHlckpOT/GaS91dVbWQHayn63JXkru7+zPD8A0NQX9k3LHT4d99wo71Jjlu2/hOGZbCVrZbnq5HnLKJV87yqXpbkeUl+dSjkJ/KcxbSW6/llSf7+8Fies4hWy/Pjk3y+qr6USS5/tqr+VuQ5i2nFPO/ur3T3d7v7oSTvzP6pLfKcRbTa9fyuJH8wTMv94yQPJTk6G8jzQxZ9uvueJHdW1ZOHRT+X5AtJdid56bDspUk+NDzeneQlw7cEnJzk68umgcGWdJA8X83uJGdX1ZFVdXwmH4z4x5scJkxltTyvqudm8vkPZ3T3t5atIs9ZOAfJ8ycua3Zmkj8dHrtvYeGskuef7e6/2d07u3tnJm8YnjG0lecsnINcz5d/HtXzM/nilcR9CwvoIO9Dr0zys0lSVU9K8sgkX80G8nzHGmN5TZLLquqRSf48ycszKRi9v6rOSfLlJP9gaPuRJL+YyQcKfWtoC4vgYXleVc9P8rYkxyS5qqpu6O7TuvuWqnp/Jr+QDyb5x9393blFDmu30vX8T5IcmeTaYdTodd39KnnOAlspz393uKF6KJP7llcNbd23sKhWyvPVyHMW1Up5fmFVPT2Tjxf5UpJXJon7FhbYSnn+zSSXVtXNSR5I8tJhNP6687z2j+IHAAAAYCzW8pk+AAAAACwYRR8AAACAEVL0AQAAABghRR8AAACAEVL0AQAAABghRR8AYGFV1Q9W1Q3Dzz1VtXd4fF9V/c684wMAmCdf2Q4AjEJVLSW5r7vPn3csAABbgZE+AMDoVNWzq+rDw+OlqnpXVX26qr5cVS+oqjdV1U1VdXVVPWJod1JV/WFVXV9V11TV4+d7FAAA01H0AQC2gxOSPCfJGUnek+ST3f3UJN9OcvpQ+Hlbkhd290lJLk3yhnkFCwAwCzvmHQAAwGHw0e7+TlXdlOSIJFcPy29KsjPJk5M8Jcm1VZWhzd1ziBMAYGYUfQCA7eD+JOnuh6rqO73/Qw0fyuR+qJLc0t2nzCtAAIBZM70LACC5LckxVXVKklTVI6rqxDnHBAAwFUUfAGDb6+4HkrwwyRur6vNJbkjy0/ONCgBgOr6yHQAAAGCEjPQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIARUvQBAAAAGCFFHwAAAIAR+n9eCFmA+OnY+QAAAABJRU5ErkJggg==\n" - }, - "metadata": {}, - "execution_count": 14 + "fd47487fc8734594823f8afa00c4239d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fee75343289f42fb8d6dfb4bf26fe368": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_16c0017f65b649f5ac5bebf1c955a1fd", + "placeholder": "​", + "style": "IPY_MODEL_5e2c207db5424f91829bf5c52040a9f2", + "value": " 1.92k/1.92k [00:00<00:00, 48.3kB/s]" + } } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "q4Rjo1aasVS1" - }, - "source": [ - "# Going further \n", - "\n", - "We have only scratched the surface in this introduction. \n", - "\n", - "More details can be found in the [`pyannote.audio` Github repository](https://github.com/pyannote/pyannote-audio).\n" - ] - }, - { - "cell_type": "code", - "source": [ - "" - ], - "metadata": { - "id": "wFK33Y6Dfkw3" - }, - "execution_count": null, - "outputs": [] + } } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/tutorials/overlapped_speech_detection.ipynb b/tutorials/overlapped_speech_detection.ipynb index 78c6372cb..1ad5d4090 100644 --- a/tutorials/overlapped_speech_detection.ipynb +++ b/tutorials/overlapped_speech_detection.ipynb @@ -20,6 +20,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -39,6 +40,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -49,6 +51,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -84,6 +87,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -103,6 +107,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -110,6 +115,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -130,6 +136,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -147,6 +154,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -161,10 +169,11 @@ "source": [ "import pytorch_lightning as pl\n", "trainer = pl.Trainer(max_epochs=10)\n", - "trainer.fit(model, osd)" + "trainer.fit(model)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -185,6 +194,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -212,6 +222,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -219,6 +230,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -242,6 +254,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -258,6 +271,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -265,6 +279,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -297,6 +312,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/tutorials/prodigy.md b/tutorials/prodigy.md deleted file mode 100644 index 4efd05f48..000000000 --- a/tutorials/prodigy.md +++ /dev/null @@ -1,151 +0,0 @@ -# Annotating your own data with 💥 Prodigy - -Manually segmenting and labeling audio data is time consuming. For speaker diarization, depending on the required level of precision, it may take more than 10 times the duration of a recording to annotate it. - -## Table of content - -* [Recipes](#recipes) -* [Keyboard shortcuts](#keyboard-shortcuts) - -## Recipes - -`pyannote.audio` comes with a bunch of [💥 Prodigy](https://prodi.gy) recipes designed to speed things up a bit. - -| Recipe | Usage | -|-----------------------|-------------------------------------------------------| - 🦻 `pyannote.audio` | Annotate with a [pretrained pipeline](https://huggingface.co/models?other=pyannote-audio-pipeline) in the loop - 🧐 `pyannote.review` | Merge multiple annotations - 🤲 `pyannote.diff` | Show differences between two annotations - 🗄 `pyannote.database` | Dump annotations as [`pyannote.database`](https://github.com/pyannote/pyannote-database/) protocols - -### 🦻 `pyannote.audio` | Annotate with a pretrained pipeline in the loop - -```bash -prodigy pyannote.audio dataset /path/to/audio/directory pyannote/speaker-segmentation -``` - -![pyannote.audio screenshot](./assets/prodigy-pyannote.audio.png) - -`pyannote.audio` recipe will stream in `.wav` files in chunks and apply [a pretrained pipeline](https://huggingface.co/models?other=pyannote-audio-pipeline). You can then adjust the regions manually if needed. - - -
-More options - -``` -prodigy pyannote.audio [options] dataset source pipeline - - dataset Prodigy dataset to save annotations to. - source Path to directory containing audio files to annotate. - pipeline Name of pretrained pipeline on huggingface.co (e.g. - pyannote/speaker-segmentation) or path to local YAML file. - -chunk DURATION Split audio files into shorter chunks of that many seconds. - Defaults to 10s. - -precision STEP Temporal precision of keyboard controls, in milliseconds. - Defaults to 200ms. - -beep Produce a beep when the player reaches the end of a region. -``` - -
- - -### 🧐 `pyannote.review` | Merge multiple annotations - -```bash -prodigy pyannote.review dataset /path/to/audio/directory input1.rttm,input2.rttm -``` - -![pyannote.review screenshot](./assets/pyannote.review.PNG) - -`pyannote.review` recipe take as many annotation files, using the RTTM file format, as you want and let you compare and choose which ones are best within the same stream as `pyannote.audio` recipe. -Click on a segment of the annotation files to add it to the ouput audio, or on "Input X" to add all segments at once. - -
-More options - -``` -prodigy pyannote.review [options] dataset source annotations - - dataset Prodigy dataset to save annotations to. - source Path to directory containing audio files whose annotation is to be checked. - annotations Comma-separated paths to annotation files. - -chunk DURATION Split audio files into shorter chunks of that many seconds. - Defaults to 30s. - -diarization Make a optimal one-to-one mapping between the first annotation and the others. - -precision STEP Temporal precision of keyboard controls, in milliseconds. - Defaults to 200ms. - -beep Produce a beep when the player reaches the end of a region. -``` - -
- - -### 🤲 `pyannote.diff` | Show differences between two annotations - -```bash -prodigy pyannote.diff dataset /path/to/audio/directory /path/to/reference.rttm /path/to/hypothesis.rttm -``` - -![pyannote.diff screenshot](./assets/pyannote.diff.PNG) - -`pyannote.diff` recipe take one reference file and one hypothesis file, using the RTTM file format, and focus where there are the most errors among missed detections, false alarms and confusions. -You can filter on one or more error types and their minimum duration with the corresponding options. - - -
-More options - -``` -prodigy pyannote.diff [options] dataset source reference hypothesis - - dataset Prodigy dataset to save annotations to. - source Path to directory containing audio files whose annotation is to be checked. - reference Path to reference file. - hypothesis Path to hypothesis file. - -chunk DURATION Split audio files into shorter chunks of that many seconds. - Defaults to 30s. - -min-duration DURATION Minimum duration of errors in ms. - Defaults to 200ms. - -diarization Make a optimal one-to-one mapping between reference and hypothesis. - -false-alarm Display false alarm errors. - -speaker-confusion Display confusion errors. - -missed-detection Display missed detection errors. -``` - -
- -### 🗄 `pyannote.database` | Dump annotations as `pyannote.database` protocols - -Work in progress - - -## Keyboard shortcuts - -Though `pyannote.audio` recipes are built on top of the Prodigy [audio interface](https://prodi.gy/docs/api-interfaces#audio), they provide a bunch of handy additional keyboard shortcuts. - -| Shortcut | Description | -|-----------------------------------|--------------------------------------------------| -| `left` / `right` (+ `w`) | Shift player cursor (speed up) | -| `up` / `down` | Switch active region | -| `shift + left` / `shift + right` | Shift active region start time | -| `ctrl + left` / `ctrl + right` | Shift active region end time | -| `shift + up` | Create a new region | -| `shift + down` / `backspace` | Remove active region | -| `spacebar` | Play/pause player | -| `escape` | Ignore this sample | -| `enter` | Validate annotation | - - -## RTTM file format - -RTTM files contain one line per speech turn, using the following convention: - -```bash -SPEAKER {uri} 1 {start_time} {duration} {speaker_id} -``` -* uri: file identifier (as given by pyannote.database protocols) -* start_time: speech turn start time in seconds -* duration: speech turn duration in seconds -* confidence: confidence score (can be anything, not used for now) -* gender: speaker gender (can be anything, not used for now) -* speaker_id: speaker identifier diff --git a/tutorials/speaker_verification.ipynb b/tutorials/speaker_verification.ipynb index 84382e6bd..0014bed93 100644 --- a/tutorials/speaker_verification.ipynb +++ b/tutorials/speaker_verification.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -15,7 +16,7 @@ "\n", "from pyannote.audio import Audio\n", "from pyannote.core import Segment\n", - "audio = Audio(sample_rate=16000, mono=True)\n", + "audio = Audio(sample_rate=16000, mono=\"downmix\")\n", "\n", "# extract embedding for a speaker speaking between t=3s and t=6s\n", "speaker1 = Segment(3., 6.)\n", diff --git a/tutorials/training_a_model.ipynb b/tutorials/training_a_model.ipynb index 871ab1d1c..f3e019f8b 100644 --- a/tutorials/training_a_model.ipynb +++ b/tutorials/training_a_model.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -180,7 +180,7 @@ ], "source": [ "import pytorch_lightning as pl\n", - "trainer = pl.Trainer(gpus=1, max_epochs=1)\n", + "trainer = pl.Trainer(devices=1, accelerator=\"gpu\", max_epochs=1)\n", "trainer.fit(vad_model)" ] }, @@ -202,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -220,17 +220,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ - "" + "" ] }, - "execution_count": 18, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -262,7 +262,7 @@ ", , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ])>" ] }, - "execution_count": 19, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -280,17 +280,32 @@ "\n", "Speaker diarization is the task of partitioning a given audio stream of recording into according to the speaker identity.\n", "\n", - "[`pyannote/segmentation`](https://hf.co/pyannote/segmentation) is a model that was pretrained to perform speaker diarization, but only locally, on 5s-long audio chunks. " + "[`pyannote/segmentation`](https://hf.co/pyannote/segmentation) is a model that was pretrained to perform speaker diarization, but only locally, on 5s-long audio chunks. \n", + "\n", + "To load the speaker segmentation model, \n", + "\n", + "* accept the user conditions on [hf.co/pyannote/segmentation](https://hf.co/pyannote/segmentation).\n", + "* login using `notebook_login` below" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from huggingface_hub import notebook_login\n", + "notebook_login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pyannote.audio import Model\n", - "pretrained = Model.from_pretrained(\"pyannote/segmentation\")" + "pretrained = Model.from_pretrained(\"pyannote/segmentation\", use_auth_token=True)" ] }, { @@ -302,17 +317,17 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ - "" + "" ] }, - "execution_count": 23, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -331,17 +346,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ - "" + "" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -359,12 +374,12 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from pyannote.audio.tasks import Segmentation\n", - "seg_task = Segmentation(ami, duration=5.0, max_num_speakers=4)" + "from pyannote.audio.tasks import SpeakerDiarization\n", + "seg_task = SpeakerDiarization(ami, duration=5.0, max_num_speakers=4)" ] }, { @@ -376,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -409,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -434,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -452,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -530,7 +545,7 @@ } ], "source": [ - "trainer = pl.Trainer(gpus=1, max_epochs=1)\n", + "trainer = pl.Trainer(devices=1, accelerator=\"gpu\", max_epochs=1)\n", "trainer.fit(finetuned)" ] }, @@ -543,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -575,38 +590,39 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ - "" + "" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Inference('pyannote/segmentation', step=2.5)(test_file)" + "\n", + "Inference('pyannote/segmentation', use_auth_token=True, step=2.5)(test_file)" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACsCAYAAADmO9AtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQvklEQVR4nO3de5Ald1k38O8jiVeCqIkxLquLKQSDQiArF68RL6HEIpQikNL3BUypUfECoihWmY2IgkIoxLK8YYmKCgoqoiREBcTyzau7uMmaxGi4GdaAQCwDEjUhj3+cTjFZ9zJn5syeOb/5fKpObXef7t5npp/67cx3+9enujsAAAAAjOXjll0AAAAAAIsn9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGNBQoU9VPaGquqoeNK3vmdZ/cs0+p1fVHVX189P6vqp69jHO99GqOrjmtaeqzq+q10/vP62q3n/EPudM7z21qv5pej11zTkvqqpDVXVtVV1RVadP28+tqqunc+yvqkds3Xfq5DvZ12bN33ltVd0wfc+fsOa9T6+qq6brc1VVfdq0/VOr6o+r6pqquq6qnr7mmBdW1d9Prycv/JsEAAAACzRU6JPkoiR/Nf15t3cmedya9W9Oct06z3d7d5+75vWuo+zzqiP2ub6qPj3JpUkemeQRSS6tqk+rqlOSvDTJV3X3Q5Jcm+QZ03l+Jsll3X1ukh+f1kdyUq9NVT00yYuSXNjdX5Dk8UleVFUPmXb5kSR/3t0PSPLn03qSfE+S67v7oUnOT/Liqvr4qnpckocnOTez6/rsqrrPOmsFAACAk26Y0Keq7p3ky5JcnOQpa976SJIbqmrvtP7kJK/e4nIuSHJVd9/a3f+W5Kokj01S0+tTqqqS3CfJv0zH9LSeJJ+6ZvvKW9K1eXaSn+rudybJ9OdPJ/mh6f0Lk7xiWn5FkidMy53ktOn63DvJrUnuTHJOkr/s7ju7+z8yC+weu6BaAQAAYOGGCX0y+yX+iu7+xyQfrKrz1rz3u0meUlW7k3w06w9UPmnN9KE/OMY+Tz5imtEnJdmV5OY1+7wnya7uviPJdyU5NNVwTpKXT/v8QJKfraqbM7tD5UfXWeMqWMa1eXCSA0ds2z9tT5Izu/uWafm9Sc6cln8+yRdMdRxK8v3dfVeSa5I8tqo+eZqS91VJdq+zVgAAADjpTtmKkx7etXtfZtObFuWyXYdv3neCfS7KbOpUMgsSLsrsF/gkuSLJ85K8L8mr5vh7b5+mWx3Pq7r7GWs3zG4S+d+q6tTMQp+HJXlHkpdlFu785LT9md39mqp6UmZh0NfMUeu6POrSK/dlwdfm6ssu2HeCfZZ1bdalu7uqelq9IMnBJI9JcnaSq6rqrd39xqr64iR/neT9Sf5fZiEVAAAAbEtD3OkzPUPnMUl+tareldkUnidlNpUq3f3fmd318YNJfv8Y59i95s6RSzZZ0uHc8y6Q+03bzp3qeXt3d2ZTmb5k2uepSV47Lf9eZs8CWnlLvDbXJznviG3n5WPPDHpfVZ01nf+sJP86bX96ktf2zE2ZPXfoQVOtz5+eH/S1U/3/uM5aAAAA4KTbkjt9luCJSX6zu7/z7g1V9ZbcM3h5cZK3dPetR7sTp7tvzhTKLMCVSX7q7k+ESvJ1md3R84lJzqmqM7r7/Um+NskN0z7/kuQrk7w5s5DknxZUy7It69q8KMnvVdVfdPe7qmpPkudO9STJ6zIL2l4w/flH0/Z/TvLVSd5aVWcmeWCSd1TVvZLct7s/OD0M+iFJ3jhnTQAAAHDSbEnoM03F2rcV5z6Gi5K88Ihtr8ma5+J093VZ/ydDzePJVfVla9a/u7v/uqqel+Rvp20/0d23JklVXZbkL6vqjiTvTvK0aZ9vT/LS6RO+/jPJd2xBrZmmYu3binMfw1KuTXcfrKrnJPnjaVrdHUl+uLsPTru8IMmrq+rizK7Dk6btz0vy61V1KLO7eZ7T3R+oqk/MLAhKktuSfGt337nImgEAAGCRajbLCAAAAICRDPFMHwAAAADuSegDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxoIR/Zfvrpp/eePXsWcSoAAAAAkhw4cOAD3X3GRo9fSOizZ8+e7N+/fxGnAgAAACBJVb17M8eb3gUAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADCghYQ+H33f+xZxGoCluO3Fly+7hA1Z1bpHtZXX41fedNOWnZtx6BOA7cfYvFi+n/NbSOhzl9AHWGEfuvwlyy5hQ1a17lFt5fV4+ZvfvmXnZhz6BGD7MTYvlu/n/EzvAgAAABiQ0AcAAABgQKcs6kSHd+1e1KkAWCdj787xqEuvXHYJAMAG+DecZXKnDwAAAMCAhD4AAAAAA1rY9K5dh29e1KkATqpVniJl7N0+trqPrr7sgi09P6vP9AGA7cm/4Yvj37r5udMHAAAAYEBCHwAAAIABCX0AAAAABrSQ0OfjzjxzEacBWIrTnvXMZZewIata96i28npcfP7ZW3ZuxqFPALYfY/Ni+X7Or7p70yfZu3dv79+/fwHlAAAAAJAkVXWgu/du9HjTuwAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfABbqV95007JLOKrtWhfc9uLLl13Cpqx6/QAwMqEPAAv18je/fdklHNV2rQs+dPlLll3Cpqx6/QAwMqEPAAAAwICEPgAAAAADEvoAAAAADOiUZRcAwHgedemVyy4BVsrhXbuXXQIAMCB3+gAAAAAMSOgDAAAAMCDTuwBYuKsvu2DZJfwvppyxne06fPOyS9gwU9MAYPtypw8AAADAgIQ+AAAAAAMS+gCwUBeff/aySziq7VoXnPasZy67hE1Z9foBYGTV3Zs+yd69e3v//v0LKAcAAACAJKmqA929d6PHu9MHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQNXdmz9J1YeS3Lj5cmBbOz3JB5ZdBGwxfc5OoM/ZCfQ5O4E+Zyd4YHefttGDT1lQETd2994FnQu2parar88ZnT5nJ9Dn7AT6nJ1An7MTVNX+zRxvehcAAADAgIQ+AAAAAANaVOjzyws6D2xn+pydQJ+zE+hzdgJ9zk6gz9kJNtXnC3mQMwAAAADbi+ldAAAAAAM6YehTVbur6k1VdX1VXVdV3z9t/+Zp/a6q2nvEMT9aVTdV1Y1VdcFWFQ+LMm+fV9Weqrq9qg5Or19cXvWwPsfp85+tqn+oqmur6g+q6r5rjjGes1Lm7XPjOavoOH3+vKnHD1bVG6vqs6ftVVU/N43n11bVw5f7FcCJbaDPz6+qf18znv/4cr8COLFj9fma93+wqrqqTp/W5x7PTzi9q6rOSnJWd7+tqk5LciDJE5J0kruS/FKSZ3f3/mn/c5L8TpJHJPnsJH+W5PO7+6Pzfflw8mygz/ckeX13f+FyKob5HafP75fkL7r7zqp6YZJ093OM56yiDfT5nhjPWTHH6fP3dPdt0z7fl+Sc7r6kqr4+yfcm+fokj0zy0u5+5HKqh/XZQJ+fn9nP69+wpJJhbsfq8+6+vqp2J/nVJA9Kcl53f2Aj4/kJ7/Tp7lu6+23T8oeS3JBkV3ff0N03HuWQC5P8bnf/V3e/M8lNmf3CANvWBvocVs5x+vyN3X3ntNvVmf1ynBjPWUEb6HNYOcfp89vW7PYpmf3nVTIbz3+jZ65Oct/pFw3YtjbQ57ByjtXn09svSfLDuWePzz2ez/VMn+l/wx6W5P8fZ7ddSW5es/6eNUXDtrfOPk+S+1fV31XVW6rqy7e+Mlic4/T5tyV5w7RsPGelrbPPE+M5K+zIPq+q51fVzUm+Jcnd01uM56y0dfZ5kjy6qq6pqjdU1YNPfqWwcWv7vKouTHK4u685Yre5x/N1hz5Vde8kr0nyA0ekqzCMOfr8liSf090PS/KsJL9dVfc5GTXCZh2rz6vqx5LcmeSVy6oNFmWOPjees7KO1ufd/WPdvTuzHn/GMuuDRZijz9+W5HO7+6FJXpbkD5dQLmzI2j7P7OeU5+aegeaGrSv0qapTpwJe2d2vPcHuh5PsXrN+v2kbbGvz9Pk03eWD0/KBJG9P8vlbXyVszrH6vKqeluQbknxLf+xhb8ZzVtI8fW48Z1Wt4+eWVyb5pmnZeM5KmqfPu/u27v7wtPynSU69++G3sJ0dpc/PTnL/JNdU1bsyG7PfVlWflQ2M5+v59K5K8vIkN3T35euo+XVJnlJVn1BV90/ygCR/s47jYGnm7fOqOqOq7jUtf15mff6Ora0SNudYfV5Vj81svvDju/sjaw4xnrNy5u1z4zmr6Dh9/oA1u12Y5B+m5dcl+b/Tp748Ksm/d/ctJ61g2IB5+7yqPms6JlX1iMx+1/3gyasY5ne0Pu/uQ939md29p7v3ZDaF6+Hd/d5sYDw/ZR11fGmS/5PkUFUdnLY9N8knZHbb3BlJ/qSqDnb3Bd19XVW9Osn1md2W9D0+6YUVMFefJ/mKJD9RVXdk9ulel3T3rSe/bJjLsfr85zLr9aumn5Wu7u5LjOesqLn6PMZzVtOx+vziqnpgZr387iSXTO/9aWaf9HJTko8kefpJrRY2Zt4+f2KS76qqO5PcnuQpa+5ehu3qqH0+3a12NHOP5yf8yHYAAAAAVs9cn94FAAAAwGoQ+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAArq6o+o6oOTq/3VtXhafnDVfULy64PAGCZfGQ7ADCEqtqX5MPd/aJl1wIAsB240wcAGE5VnV9Vr5+W91XVK6rqrVX17qr6xqr6mao6VFVXVNWp037nVdVbqupAVV1ZVWct96sAANgcoQ8AsBOcneQxSR6f5LeSvKm7vyjJ7UkeNwU/L0vyxO4+L8mvJXn+sooFAFiEU5ZdAADASfCG7r6jqg4luVeSK6bth5LsSfLAJF+Y5KqqyrTPLUuoEwBgYYQ+AMBO8F9J0t13VdUd/bGHGt6V2c9DleS67n70sgoEAFg007sAAJIbk5xRVY9Okqo6taoevOSaAAA2RegDAOx43f3fSZ6Y5IVVdU2Sg0m+ZKlFAQBsko9sBwAAABiQO30AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAf0PoCynY/nhXP8AAAAASUVORK5CYII=", "text/plain": [ - "" + "" ] }, - "execution_count": 38, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -625,14 +641,14 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pyannote.audio.tasks import OverlappedSpeechDetection\n", "osd_task = OverlappedSpeechDetection(ami, duration=2.0)\n", "\n", - "osd_model = Model.from_pretrained(\"pyannote/segmentation\")\n", + "osd_model = Model.from_pretrained(\"pyannote/segmentation\", use_auth_token=True)\n", "osd_model.task = osd_task" ] }, @@ -645,7 +661,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -669,7 +685,7 @@ " 'lstm']" ] }, - "execution_count": 40, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -687,7 +703,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -765,7 +781,7 @@ } ], "source": [ - "trainer = pl.Trainer(gpus=1, max_epochs=1)\n", + "trainer = pl.Trainer(devices=1, accelerator=\"gpu\", max_epochs=1)\n", "trainer.fit(osd_model)" ] }, @@ -778,17 +794,17 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABHYAAACaCAYAAADM+M9qAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAARiUlEQVR4nO3da6xlZ1kH8OfZnV7CpSK0lFvh1KYgF0OAClajIQLBIIJRTIhE4uVLjRcIGg0lGqLhA2owQmLUoImXipJ4I9wEAyF8oOJMaWnKJSmXUpoSKBqgcut0PX7Ye5+9Oc5M56yz3rNmrfP7JU33nLP3mvckz15nv/953+fNqgoAAAAApmcx9gAAAAAA6EewAwAAADBRgh0AAACAiRLsAAAAAEyUYAcAAABgogQ7AAAAABN1bD9PvuSSS2pnZ6fRUAAAAACOnhMnTtxdVZf2ee2+gp2dnZ04fvx4n78HAAAAgFPIzNv7vtZWLAAAAICJEuwAAAAATJRgBwAAAGCiBDsAAAAAEyXYAQAAAJgowQ4AAADARO3ruHMA2PaRz/1P/ON/3RFVY4/k4B7xXRfFK597VWTm2ENhJj71pXvizR/8THTdsG+QC89fxCuec1U87EEXDnpdAGCaBDsA9PbW43fEW4/fEQ9/8EVjD+VA/vfbJ+Nr3zwZL7/mcSbLDOYdH70r3vLhz8VlF18YGcMEhie7Lu6+59vx/TsPjZ946qMGuSYAMG2CHQB6u6+ruOzii+JDr37O2EM5kL/90Gfjd/7t1hh4YQVH3H2rgrrh1c8ZbCXYbV+8J577hg9EN4dlcgDAIPTYAaC3riIWM9i6tJ50l8kyA1rX05Db+xa5vvZglwQAJk6wA0BvXVXMINfZDaes2GFIy+Bz2GtualWxAgBLgh0A+pvJip3dVRBhssxwKmrw98did3XZoJcFACZMsANAb13V4CsSxmDFDi202Kq4vpwVOwDAmmAHgN66GrZ/yGjWk2XJDgPqqmKgw7B2pR47AMAegh0Aeptbjx2TZYZUeuwAAIdAsANAb6XHDpxWVcMeO4NeFQCYMsEOAL0tm8OOPYqD02OHFlr02FnosQMA7CHYAaC3rovIoZuIjEBDWlposVUxhZAAwB6CHQB6m0uPndztsWO2zHAa9E7eap6sVgGAJcEOAL212GoyhoWThmigqmIx8F5Fjb4BgL0EOwAcQMViBr9J9NihBT12AIDDMIOP4wCMpat59NgxWaaFroZvLq7HDgCwl2AHgN5aTFzHsZ4smy0znGX4MuwbRI8dAGAvwQ4AvXW1WUEwZXrs0EI1CD432wYVKwCwJNgBoLcWE9cxaEhLC9Wwx45aBQDWBDsA9NZi4jqGdQNoqyAYUoutihp9AwB7CXYA6K2rihnkOlsNac2WGU6LrYqp0TcAsIdgB4DelsHO9JOd9U9gFQRDqgbB5/oUOs2TAYA1wQ4AvXUVs+qxE2GyzHAq9NgBANoT7ADQ31x67OhbQgN67AAAh0GwA0Bvc+mxs558d2bLDKhrEHzqsQMA7CXYAaC35YqEGSQ7u5PlcYfBvHRVmwZOA1n3tNJjBwBYE+wA0FuLU3/GsA6nSo8dhtRoq+IidYMCADYEOwD0Vg16iIxhN9gxW2ZALXrsRCzr1VYsAGBNsANAbxWD7zQZxULfEhpotVVxGewMflkAYKIEOwD0NpceO+mkIRpotVUxUwgJAGwIdgDorevm0WPHSUO0UFVNVrRl2jYIAGwIdgDorVUPkcO2u+rIZJkBVUUsGnzSWmQ6FQsA2CXYAeBA5rAVS48dWtBjBwA4DIIdAHrrqmIGuc7u5NtkmSHpsQMAHAbBDgC9dTWPFTtrJssMqWvVYyf02AEANgQ7APQ2txU7JssMrUUPqsVCjx0AYEOwA0BvNZMVO+sGtybLDEmPHQDgMAh2AOitZrZix2SZIXVdm+BzoccOALBFsANAb3PpseNULFpotVUxrdgBALYIdgDobS49diLWK3bMlhlOVbQJdsK2QQBgQ7ADQG+z6bEz/R+Bc1BFux47ch0AYE2wA0Bv1eg458O26bFjtsxwWm1V1GMHANgm2AGgt/n02FkFO93IA2FW9NgBAA6DYAeA3rqq3aPCpyw1T6aBrpYhzNAy9dgBADZm8HEcgLG0mrgetvWPYKrMoKqa9G9aZKpVAGCXYAeA3qrRxPWwrbdiWQXBkPTYAQAOg2AHgN4qInIG7ZM3zZNHHgiz0jVcsaNWAYA1wQ4AvbWauB62hR47NNCyx45aBQDWBDsA9NZ1NYseO+tFR+bKDKmqmqxny0wNoQCAXYIdAHqrmR13rscOQ2r1/tBjBwDYJtgBoLeuKmaQ6+ixQxNdVSwafNJa9thRrADAkmAHgN4qQo8dOI1l8Nmix47myQDAhmAHgN6WzZOnn+ykFTs00HIrlm2DAMCaYAeA3lqd+nPYcrd5sskyw6mIRs2TNfoGADYEOwD0VjPrsWOyzJCWK9qGv64eOwDANsEOAL0tt5qMPYqD02OHFlptVdRjBwDYJtgBoLe59NhxKhYtdF2brYqOOwcAtgl2AOhtLj121ipMlhlWi7fHfN5xAMAQBDsA9LJuNDyPrVh67DA8PXYAgMMg2AGgl/W2pZzB+oHdHjv2YjGgVlsVF5nRdYNfFgCYKMEOAL3MccWOXIchtdqqmHrsAABbBDsA9LIOQRYzSHbWc289dhhSq1PjFpkqFQDYJdgBoJf1ioE59E5OK3ZooKraNE/OzYo5AADBDgC91Ix67EQsV1aYLDOkpj12lCoAsCLYAaCX9balGezEiggnDTG8rqJJsKPHDgCwTbADQC+7PXbmsBcrrIJgeF2jrVhqFQDYJtgBoJc59diJiIjcbC+DQVSbrYqZoVgBgF2CHQB6qW75/xbHOY9Bjx2GtuyxM/x1rdgBALYJdgDoZb1iR48dOLWuIhYN3iALPXYAgC2CHQB6WU8r9diBU2vVYyfVKgCwRbADQC9zW7GTeuwwsIo2wadtgwDANsEOAL1smifPI9nJsL2FYVVVg9bJy4bMShUAWBPsANDLemI5k1wnFou0CoJBddVoxc5CCAkAbAh2AOhlPa/UYwdOrdWpWKnRNwCwRbADQC9z67GzyIgKk2WGU9Vmq+IiU6UCALsEOwD0MrceOxFW7DCc2n1/DH/tDI2+AYANwQ4Avez22Bl3GINx0hBD6hpuVVykHjsAwIZgB4BeNlux5hHtLDKj68YeBXPRcqviQo8dAGCLYAeAXnabJ8/kN4lVEAyp5VbFFEICAFtm8nEcgMM2txU7qSEtA2p5atxcGpYDAMMQ7ADQy9waDacVOwxotwdVk+PO1SoAsCHYAaCXmtmKnUWmk4YYjB47AMBhEewA0Mt6WjmfYMcqCIbTcqtiZs5uxRwA0J9gB4BeWq5IGIMVOwxpXUotmicvMtQqALBLsANAL+tTeVpMXEdhxQ4DqvX7o8G1MzdbIQEABDsA9LI5znnkgQzEih2GpMcOAHBYBDsA9NLyOOcx6LHDkHaDnQbJzkKPHQBgi2AHgF4q9NiB02nZY8dWLABgm2AHgF66ma3YSdtbGFDrrVhKFQBYE+wA0MtuCDKPXCcywvYWBrN5ezRYsRO2DQIAG8f2+4L7fOoFIDa/D+ayYmexWE6W/Z5jCPfetzwWq8mKnUXGfWoVAFjZV7Bzy51fiSuve2ersQAwQefPpMnOscUi3veJL/o9x6COnTf84uhji4xv3tupVQCYiSdc9uADvX5fwc5lF18Ur3re4w/0FwIwHw+44Lx4+uO+e+xhDOK6Fzwxbvj0l8ceBjNywbFFPO+Jlw1+3Z991mPjARecZ+sgAMzEQx94QbznAK/P/ZyqcPXVV9fx48cP8NcBAAAAsC0zT1TV1X1eq3kyAAAAwEQJdgAAAAAmSrADAAAAMFGCHQAAAICJEuwAAAAATJRgBwAAAGCiBDsAAAAAE5VVdfZPzvxSRNzebjhwTrgkIu4eexDQkBrnKFDnHAXqnKNAnXMUXBIRD6yqS/u8eF/BDhwFmXm8qq4eexzQihrnKFDnHAXqnKNAnXMUHLTObcUCAAAAmCjBDgAAAMBECXbg//uLsQcAjalxjgJ1zlGgzjkK1DlHwYHqXI8dAAAAgImyYgcAAABgogQ7HCmZeXlmvj8zP5aZt2bmK1Zf/5nVn7vMvHrr+TuZ+Y3MvGn135+NN3o4O2eo8z/MzE9k5kcz818y8yFbr3l1Zt6WmZ/MzOePNng4C/utcfdypugMdf77qxq/KTPfk5mPWn09M/ONq3v5RzPz6eP+BHD/etT5szPzK1v3898d9yeA+3e6Ot/6/m9kZmXmJas/7/t+bisWR0pmPjIiHllVN2bmgyPiRET8ZERURHQR8ecR8ZtVdXz1/J2IeHtVPWWcEcP+naHOHxMR76uqk5n5+oiIqvrtzHxSRLwlIp4ZEY+KiP+IiMdX1X2j/ABwP3rU+E64lzMxZ6jzz1fVV1fP+fWIeFJVXZuZL4iIX4uIF0TEsyLiT6rqWeOMHs5Ojzp/diw/q79wpCHDvp2uzqvqY5l5eUS8OSK+NyKeUVV397mfW7HDkVJVd1XVjavHX4uIj0fEo6vq41X1yXFHB8M4Q52/p6pOrp52QywnwRERL46If6iqb1XVZyLitliGPHBO6lHjMDlnqPOvbj3tgbH8x6mI5b38b2rphoh4yGoyAeesHnUOk3O6Ol99+48j4rfiO2t83/dzwQ5H1upfcJ8WEf95P0+9IjM/kpkfyMwfbj8yGM4Z6vwXI+Jdq8ePjog7tr73+dj8soFz2lnWeIR7ORO2t84z83WZeUdEvCwi1ltR3MuZtLOs84iIazLz5sx8V2Y++fBHCv1t13lmvjgi7qyqm/c8bd/3c8EOR1JmPigi/ikiXrnnXwT2uisiHltVT4uIV0XE32fmxYcxRjio09V5Zr4mIk5GxPVjjQ2GsI8ady9nsk5V51X1mqq6PJY1/qtjjg+GsI86vzEiHldVT42IN0XEv44wXOhlu85j+TnluvjO0LI3wQ5HTmaeH8s31PVV9c9neu5qa8qXV49PRMSnIuLx7UcJB3O6Os/Mn4+IF0bEy2rTZO3OiLh86+WPWX0Nzln7qXH3cqbqLD6zXB8RP7167F7OJO2nzqvqq1V1z+rxOyPi/HXDWTiXnaLOr4yIKyLi5sz8bCzv2Tdm5iOix/1csMORkpkZEX8ZER+vqjecxfMvzczzVo+/JyKuiohPtx0lHMzp6jwzfyyWe3hfVFVf33rJ2yLipZl5YWZeEcs6//Bhjhn2Y7817l7OFJ2hzq/aetqLI+ITq8dvi4iXr05T+YGI+EpV3XVoA4Ye9lvnmfmI1WsiM58Zy/nslw9vxLB/p6rzqrqlqh5eVTtVtRPL7VZPr6ovRI/7+bG2PwKcc34oIn4uIm7JzJtWX7suIi6M5XLOSyPiHZl5U1U9PyJ+JCJ+LzPvjeWpWddW1X8f/rBhX05X52+MZa2/d/WZ6Iaquraqbs3Mt0bEx2K5LPRXnIjFOW5fNR7u5UzT6er8lzLzCbGs5dsj4trV994ZyxNUbouIr0fELxzqaKGf/db5SyLilzPzZER8IyJeurUCGc5Vp6zz1aqzU9n3/dxx5wAAAAATZSsWAAAAwEQJdgAAAAAmSrADAAAAMFGCHQAAAICJEuwAAAAATJRgBwA452XmwzLzptV/X8jMO1eP78nMPx17fAAAY3HcOQAwKZn52oi4p6r+aOyxAACMzYodAGCyMvPZmfn21ePXZuZfZ+YHM/P2zPypzPyDzLwlM9+dmeevnveMzPxAZp7IzH/PzEeO+1MAAPQn2AEA5uTKiPjRiHhRRPxdRLy/qr4vIr4RET++CnfeFBEvqapnRMRfRcTrxhosAMBBHRt7AAAAA3pXVd2bmbdExHkR8e7V12+JiJ2IeEJEPCUi3puZsXrOXSOMEwBgEIIdAGBOvhURUVVdZt5bm2aCXSw/92RE3FpV14w1QACAIdmKBQAcJZ+MiEsz85qIiMw8PzOfPPKYAAB6E+wAAEdGVX07Il4SEa/PzJsj4qaI+MFRBwUAcACOOwcAAACYKCt2AAAAACZKsAMAAAAwUYIdAAAAgIkS7AAAAABMlGAHAAAAYKIEOwAAAAATJdgBAAAAmCjBDgAAAMBE/R8Qbrr6AHHA/wAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, - "execution_count": 46, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -800,17 +816,17 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABH0AAACsCAYAAADmO9AtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQvklEQVR4nO3de5Ald1k38O8jiVeCqIkxLquLKQSDQiArF68RL6HEIpQikNL3BUypUfECoihWmY2IgkIoxLK8YYmKCgoqoiREBcTyzau7uMmaxGi4GdaAQCwDEjUhj3+cTjFZ9zJn5syeOb/5fKpObXef7t5npp/67cx3+9enujsAAAAAjOXjll0AAAAAAIsn9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGNBQoU9VPaGquqoeNK3vmdZ/cs0+p1fVHVX189P6vqp69jHO99GqOrjmtaeqzq+q10/vP62q3n/EPudM7z21qv5pej11zTkvqqpDVXVtVV1RVadP28+tqqunc+yvqkds3Xfq5DvZ12bN33ltVd0wfc+fsOa9T6+qq6brc1VVfdq0/VOr6o+r6pqquq6qnr7mmBdW1d9Prycv/JsEAAAACzRU6JPkoiR/Nf15t3cmedya9W9Oct06z3d7d5+75vWuo+zzqiP2ub6qPj3JpUkemeQRSS6tqk+rqlOSvDTJV3X3Q5Jcm+QZ03l+Jsll3X1ukh+f1kdyUq9NVT00yYuSXNjdX5Dk8UleVFUPmXb5kSR/3t0PSPLn03qSfE+S67v7oUnOT/Liqvr4qnpckocnOTez6/rsqrrPOmsFAACAk26Y0Keq7p3ky5JcnOQpa976SJIbqmrvtP7kJK/e4nIuSHJVd9/a3f+W5Kokj01S0+tTqqqS3CfJv0zH9LSeJJ+6ZvvKW9K1eXaSn+rudybJ9OdPJ/mh6f0Lk7xiWn5FkidMy53ktOn63DvJrUnuTHJOkr/s7ju7+z8yC+weu6BaAQAAYOGGCX0y+yX+iu7+xyQfrKrz1rz3u0meUlW7k3w06w9UPmnN9KE/OMY+Tz5imtEnJdmV5OY1+7wnya7uviPJdyU5NNVwTpKXT/v8QJKfraqbM7tD5UfXWeMqWMa1eXCSA0ds2z9tT5Izu/uWafm9Sc6cln8+yRdMdRxK8v3dfVeSa5I8tqo+eZqS91VJdq+zVgAAADjpTtmKkx7etXtfZtObFuWyXYdv3neCfS7KbOpUMgsSLsrsF/gkuSLJ85K8L8mr5vh7b5+mWx3Pq7r7GWs3zG4S+d+q6tTMQp+HJXlHkpdlFu785LT9md39mqp6UmZh0NfMUeu6POrSK/dlwdfm6ssu2HeCfZZ1bdalu7uqelq9IMnBJI9JcnaSq6rqrd39xqr64iR/neT9Sf5fZiEVAAAAbEtD3OkzPUPnMUl+tareldkUnidlNpUq3f3fmd318YNJfv8Y59i95s6RSzZZ0uHc8y6Q+03bzp3qeXt3d2ZTmb5k2uepSV47Lf9eZs8CWnlLvDbXJznviG3n5WPPDHpfVZ01nf+sJP86bX96ktf2zE2ZPXfoQVOtz5+eH/S1U/3/uM5aAAAA4KTbkjt9luCJSX6zu7/z7g1V9ZbcM3h5cZK3dPetR7sTp7tvzhTKLMCVSX7q7k+ESvJ1md3R84lJzqmqM7r7/Um+NskN0z7/kuQrk7w5s5DknxZUy7It69q8KMnvVdVfdPe7qmpPkudO9STJ6zIL2l4w/flH0/Z/TvLVSd5aVWcmeWCSd1TVvZLct7s/OD0M+iFJ3jhnTQAAAHDSbEnoM03F2rcV5z6Gi5K88Ihtr8ma5+J093VZ/ydDzePJVfVla9a/u7v/uqqel+Rvp20/0d23JklVXZbkL6vqjiTvTvK0aZ9vT/LS6RO+/jPJd2xBrZmmYu3binMfw1KuTXcfrKrnJPnjaVrdHUl+uLsPTru8IMmrq+rizK7Dk6btz0vy61V1KLO7eZ7T3R+oqk/MLAhKktuSfGt337nImgEAAGCRajbLCAAAAICRDPFMHwAAAADuSegDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxoIR/Zfvrpp/eePXsWcSoAAAAAkhw4cOAD3X3GRo9fSOizZ8+e7N+/fxGnAgAAACBJVb17M8eb3gUAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADAgoQ8AAADAgIQ+AAAAAAMS+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAAAAAxI6AMAAAAwIKEPAAAAwICEPgAAAAADEvoAAAAADEjoAwAAADCghYQ+H33f+xZxGoCluO3Fly+7hA1Z1bpHtZXX41fedNOWnZtx6BOA7cfYvFi+n/NbSOhzl9AHWGEfuvwlyy5hQ1a17lFt5fV4+ZvfvmXnZhz6BGD7MTYvlu/n/EzvAgAAABiQ0AcAAABgQKcs6kSHd+1e1KkAWCdj787xqEuvXHYJAMAG+DecZXKnDwAAAMCAhD4AAAAAA1rY9K5dh29e1KkATqpVniJl7N0+trqPrr7sgi09P6vP9AGA7cm/4Yvj37r5udMHAAAAYEBCHwAAAIABCX0AAAAABrSQ0OfjzjxzEacBWIrTnvXMZZewIata96i28npcfP7ZW3ZuxqFPALYfY/Ni+X7Or7p70yfZu3dv79+/fwHlAAAAAJAkVXWgu/du9HjTuwAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfABbqV95007JLOKrtWhfc9uLLl13Cpqx6/QAwMqEPAAv18je/fdklHNV2rQs+dPlLll3Cpqx6/QAwMqEPAAAAwICEPgAAAAADEvoAAAAADOiUZRcAwHgedemVyy4BVsrhXbuXXQIAMCB3+gAAAAAMSOgDAAAAMCDTuwBYuKsvu2DZJfwvppyxne06fPOyS9gwU9MAYPtypw8AAADAgIQ+AAAAAAMS+gCwUBeff/aySziq7VoXnPasZy67hE1Z9foBYGTV3Zs+yd69e3v//v0LKAcAAACAJKmqA929d6PHu9MHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAQl9AAAAAAYk9AEAAAAYkNAHAAAAYEBCHwAAAIABCX0AAAAABiT0AQAAABiQ0AcAAABgQNXdmz9J1YeS3Lj5cmBbOz3JB5ZdBGwxfc5OoM/ZCfQ5O4E+Zyd4YHefttGDT1lQETd2994FnQu2parar88ZnT5nJ9Dn7AT6nJ1An7MTVNX+zRxvehcAAADAgIQ+AAAAAANaVOjzyws6D2xn+pydQJ+zE+hzdgJ9zk6gz9kJNtXnC3mQMwAAAADbi+ldAAAAAAM6YehTVbur6k1VdX1VXVdV3z9t/+Zp/a6q2nvEMT9aVTdV1Y1VdcFWFQ+LMm+fV9Weqrq9qg5Or19cXvWwPsfp85+tqn+oqmur6g+q6r5rjjGes1Lm7XPjOavoOH3+vKnHD1bVG6vqs6ftVVU/N43n11bVw5f7FcCJbaDPz6+qf18znv/4cr8COLFj9fma93+wqrqqTp/W5x7PTzi9q6rOSnJWd7+tqk5LciDJE5J0kruS/FKSZ3f3/mn/c5L8TpJHJPnsJH+W5PO7+6Pzfflw8mygz/ckeX13f+FyKob5HafP75fkL7r7zqp6YZJ093OM56yiDfT5nhjPWTHH6fP3dPdt0z7fl+Sc7r6kqr4+yfcm+fokj0zy0u5+5HKqh/XZQJ+fn9nP69+wpJJhbsfq8+6+vqp2J/nVJA9Kcl53f2Aj4/kJ7/Tp7lu6+23T8oeS3JBkV3ff0N03HuWQC5P8bnf/V3e/M8lNmf3CANvWBvocVs5x+vyN3X3ntNvVmf1ynBjPWUEb6HNYOcfp89vW7PYpmf3nVTIbz3+jZ65Oct/pFw3YtjbQ57ByjtXn09svSfLDuWePzz2ez/VMn+l/wx6W5P8fZ7ddSW5es/6eNUXDtrfOPk+S+1fV31XVW6rqy7e+Mlic4/T5tyV5w7RsPGelrbPPE+M5K+zIPq+q51fVzUm+Jcnd01uM56y0dfZ5kjy6qq6pqjdU1YNPfqWwcWv7vKouTHK4u685Yre5x/N1hz5Vde8kr0nyA0ekqzCMOfr8liSf090PS/KsJL9dVfc5GTXCZh2rz6vqx5LcmeSVy6oNFmWOPjees7KO1ufd/WPdvTuzHn/GMuuDRZijz9+W5HO7+6FJXpbkD5dQLmzI2j7P7OeU5+aegeaGrSv0qapTpwJe2d2vPcHuh5PsXrN+v2kbbGvz9Pk03eWD0/KBJG9P8vlbXyVszrH6vKqeluQbknxLf+xhb8ZzVtI8fW48Z1Wt4+eWVyb5pmnZeM5KmqfPu/u27v7wtPynSU69++G3sJ0dpc/PTnL/JNdU1bsyG7PfVlWflQ2M5+v59K5K8vIkN3T35euo+XVJnlJVn1BV90/ygCR/s47jYGnm7fOqOqOq7jUtf15mff6Ora0SNudYfV5Vj81svvDju/sjaw4xnrNy5u1z4zmr6Dh9/oA1u12Y5B+m5dcl+b/Tp748Ksm/d/ctJ61g2IB5+7yqPms6JlX1iMx+1/3gyasY5ne0Pu/uQ939md29p7v3ZDaF6+Hd/d5sYDw/ZR11fGmS/5PkUFUdnLY9N8knZHbb3BlJ/qSqDnb3Bd19XVW9Osn1md2W9D0+6YUVMFefJ/mKJD9RVXdk9ulel3T3rSe/bJjLsfr85zLr9aumn5Wu7u5LjOesqLn6PMZzVtOx+vziqnpgZr387iSXTO/9aWaf9HJTko8kefpJrRY2Zt4+f2KS76qqO5PcnuQpa+5ehu3qqH0+3a12NHOP5yf8yHYAAAAAVs9cn94FAAAAwGoQ+gAAAAAMSOgDAAAAMCChDwAAAMCAhD4AAAAAAxL6AAArq6o+o6oOTq/3VtXhafnDVfULy64PAGCZfGQ7ADCEqtqX5MPd/aJl1wIAsB240wcAGE5VnV9Vr5+W91XVK6rqrVX17qr6xqr6mao6VFVXVNWp037nVdVbqupAVV1ZVWct96sAANgcoQ8AsBOcneQxSR6f5LeSvKm7vyjJ7UkeNwU/L0vyxO4+L8mvJXn+sooFAFiEU5ZdAADASfCG7r6jqg4luVeSK6bth5LsSfLAJF+Y5KqqyrTPLUuoEwBgYYQ+AMBO8F9J0t13VdUd/bGHGt6V2c9DleS67n70sgoEAFg007sAAJIbk5xRVY9Okqo6taoevOSaAAA2RegDAOx43f3fSZ6Y5IVVdU2Sg0m+ZKlFAQBsko9sBwAAABiQO30AAAAABiT0AQAAABiQ0AcAAABgQEIfAAAAgAEJfQAAAAAGJPQBAAAAGJDQBwAAAGBAQh8AAACAAf0PoCynY/nhXP8AAAAASUVORK5CYII=", "text/plain": [ - "" + "" ] }, - "execution_count": 44, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -838,7 +854,7 @@ "We also benefit from all the nice things [`pytorch-lightning`](ttps://pytorch-lightning.readthedocs.io) has to offer (like multi-gpu training, for instance).\n", "\n", "```python\n", - "trainer = Trainer(gpus=4, strategy='ddp')\n", + "trainer = Trainer(devices=4, accelerator=\"gpu\", strategy='ddp')\n", "trainer.fit(model)\n", "```\n", "\n", @@ -860,21 +876,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" } }, "nbformat": 4, diff --git a/tutorials/voice_activity_detection.ipynb b/tutorials/voice_activity_detection.ipynb index 1b80bd391..df2cadd15 100644 --- a/tutorials/voice_activity_detection.ipynb +++ b/tutorials/voice_activity_detection.ipynb @@ -273,7 +273,7 @@ ], "source": [ "import pytorch_lightning as pl\n", - "trainer = pl.Trainer(gpus=1, max_epochs=2)\n", + "trainer = pl.Trainer(devices=1, accelerator=\"gpu\", max_epochs=2)\n", "trainer.fit(model)" ] }, diff --git a/version.txt b/version.txt index 38f77a65b..4a36342fc 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.0.1 +3.0.0