diff --git a/source/notebooks/find_all_thai_rhyming_words.ipynb b/source/notebooks/find_all_thai_rhyming_words.ipynb new file mode 100644 index 0000000..9f58ac5 --- /dev/null +++ b/source/notebooks/find_all_thai_rhyming_words.ipynb @@ -0,0 +1,167 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Find all Thai rhyming words from Thai word\n", + "\n" + ], + "metadata": { + "id": "K9ODYQZM4GAN" + } + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s3hYEQjP2J_e", + "outputId": "74d31fda-c63e-4512-d127-fd65c22c36a3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: pythainlp in /usr/local/lib/python3.10/dist-packages (4.1.0b4)\n", + "Requirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from pythainlp) (2.31.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2023.7.22)\n", + "Collecting python-crfsuite\n", + " Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m993.5/993.5 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: python-crfsuite\n", + "Successfully installed python-crfsuite-0.9.9\n" + ] + } + ], + "source": [ + "!pip install --pre pythainlp\n", + "!pip install python-crfsuite" + ] + }, + { + "cell_type": "code", + "source": [ + "from pythainlp.corpus import thai_words\n", + "from pythainlp.tokenize import syllable_tokenize" + ], + "metadata": { + "id": "-RnAwSB62Nd9" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "all_thai_words_dict = [i for i in list(thai_words()) if len(syllable_tokenize(i))==1]" + ], + "metadata": { + "id": "KJpJyT-T2RsT" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from pythainlp.khavee import KhaveeVerifier\n", + "kv = KhaveeVerifier()" + ], + "metadata": { + "id": "2eBgB7KI2d_v" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "all_thai_words_dict[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "R2NSwqxT25vh", + "outputId": "570f4407-4741-42e9-8f07-e322189df2ab" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'เทอญ'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "word=\"จีบ\"\n", + "list_sumpus=[]\n", + "for i in all_thai_words_dict:\n", + " try:\n", + " if kv.is_sumpus(word,i) and i!=word:\n", + " list_sumpus.append(i)\n", + " except:\n", + " pass\n", + "print(list_sumpus)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uu3fCa8Z2lGR", + "outputId": "53dd7ed0-e9c6-487f-c9e2-f1159ac31e27" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['กลีบ', 'อีฟ', 'ถีบ', 'รีฟ', 'ตีบ', 'ชีพ', 'หลีบ', 'บีบ', 'ตี้บ', 'ลีบ', 'ทวีป', 'งีบ', 'หีบ', 'คีบ', 'ปี๊บ', 'หนีบ', 'รีบ', 'ทีป', 'จี๊ป', 'ปีบ', 'ครีบ', 'กีบ']\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "mbqluTsh23HF" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file