Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Sep 14, 2023
1 parent fed039b commit 8b3923a
Showing 1 changed file with 167 additions and 0 deletions.
167 changes: 167 additions & 0 deletions source/notebooks/find_all_thai_rhyming_words.ipynb
@@ -0,0 +1,167 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# Find all Thai rhyming words from Thai word\n",
"\n"
],
"metadata": {
"id": "K9ODYQZM4GAN"
}
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "s3hYEQjP2J_e",
"outputId": "74d31fda-c63e-4512-d127-fd65c22c36a3"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: pythainlp in /usr/local/lib/python3.10/dist-packages (4.1.0b4)\n",
"Requirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from pythainlp) (2.31.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.2.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2023.7.22)\n",
"Collecting python-crfsuite\n",
" Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m993.5/993.5 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: python-crfsuite\n",
"Successfully installed python-crfsuite-0.9.9\n"
]
}
],
"source": [
"!pip install --pre pythainlp\n",
"!pip install python-crfsuite"
]
},
{
"cell_type": "code",
"source": [
"from pythainlp.corpus import thai_words\n",
"from pythainlp.tokenize import syllable_tokenize"
],
"metadata": {
"id": "-RnAwSB62Nd9"
},
"execution_count": 16,
"outputs": []
},
{
"cell_type": "code",
"source": [
"all_thai_words_dict = [i for i in list(thai_words()) if len(syllable_tokenize(i))==1]"
],
"metadata": {
"id": "KJpJyT-T2RsT"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from pythainlp.khavee import KhaveeVerifier\n",
"kv = KhaveeVerifier()"
],
"metadata": {
"id": "2eBgB7KI2d_v"
},
"execution_count": 18,
"outputs": []
},
{
"cell_type": "code",
"source": [
"all_thai_words_dict[0]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "R2NSwqxT25vh",
"outputId": "570f4407-4741-42e9-8f07-e322189df2ab"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'เทอญ'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"word=\"จีบ\"\n",
"list_sumpus=[]\n",
"for i in all_thai_words_dict:\n",
" try:\n",
" if kv.is_sumpus(word,i) and i!=word:\n",
" list_sumpus.append(i)\n",
" except:\n",
" pass\n",
"print(list_sumpus)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uu3fCa8Z2lGR",
"outputId": "53dd7ed0-e9c6-487f-c9e2-f1159ac31e27"
},
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['กลีบ', 'อีฟ', 'ถีบ', 'รีฟ', 'ตีบ', 'ชีพ', 'หลีบ', 'บีบ', 'ตี้บ', 'ลีบ', 'ทวีป', 'งีบ', 'หีบ', 'คีบ', 'ปี๊บ', 'หนีบ', 'รีบ', 'ทีป', 'จี๊ป', 'ปีบ', 'ครีบ', 'กีบ']\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "mbqluTsh23HF"
},
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit 8b3923a

Please sign in to comment.