Skip to content

Commit

Permalink
allows installing as python library with pip
Browse files Browse the repository at this point in the history
  • Loading branch information
ppisljar committed Aug 5, 2023
1 parent d3fbaae commit ae3a17c
Show file tree
Hide file tree
Showing 17 changed files with 58 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
slovene_g2p.egg-info
build
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include slovene_g2p/resources/ *
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,18 @@
# slovene_g2p
A converter that converts Slovene words to their IPA and/or SAMPA transcriptions.

## installation

pip install .

## usage

```
from slovene_g2p.SloveneG2P import SloveneG2P
g2p = SloveneG2P("ipa_symbol", "cjvt_ipa_detailed_representation", "phoneme_string")
g2p.convert_to_phonetic_transcription(word="govoriti", msd_sl="Ggdd-em", morphological_pattern_code="G1.2.d")
```

phoneme_option can be either "ipa_symbol" or "sampa_symbol" and representation option can be either "cjvt_ipa_detailed_representation", "cjvt_ipa_robust_representation", "cjvt_sampa_detailed_representation", "cjvt_sampa_robust_representation"

both msd_sl and morphological_pattern_code are available in sloleks 3.0 and provided by classla python package
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nltk>=3.6.7
classla>=1.1.0
reldi-tokeniser>=1.0.1
11 changes: 11 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from setuptools import setup, find_packages

setup(
name='slovene_g2p',
version='0.1',
packages=find_packages(),
install_requires=[
# Add any required external dependencies here
],
include_package_data=True,
)
4 changes: 4 additions & 0 deletions slovene_g2p.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Metadata-Version: 2.1
Name: slovene-g2p
Version: 0.1
License-File: LICENSE
14 changes: 14 additions & 0 deletions slovene_g2p.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
LICENSE
MANIFEST.in
README.md
setup.py
slovene_g2p.egg-info/PKG-INFO
slovene_g2p.egg-info/SOURCES.txt
slovene_g2p.egg-info/dependency_links.txt
slovene_g2p.egg-info/top_level.txt
slovene_g2p/resources/SloveneG2P_phoneme_set.json
slovene_g2p/resources/schwa_rules.tsv
slovene_g2p/resources/table_of_consonant_phonemes.tsv
slovene_g2p/resources/table_of_obstruent_conversions.tsv
slovene_g2p/resources/table_of_other_symbols.tsv
slovene_g2p/resources/table_of_vowel_phonemes.tsv
1 change: 1 addition & 0 deletions slovene_g2p.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions slovene_g2p.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

9 changes: 5 additions & 4 deletions SloveneG2P.py → slovene_g2p/SloveneG2P.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import json
import os
from collections import defaultdict as dd

current_folder = os.path.dirname(__file__)

class SloveneG2P:

def __init__(self, representation_option, phoneme_set_option, output_option):
self.phoneme_set_file_path = "./resources/SloveneG2P_phoneme_set.json"
self.conversion_file_path = "./resources/table_of_obstruent_conversions.tsv"
self.phoneme_set_file_path = os.path.join(current_folder, "resources/SloveneG2P_phoneme_set.json")
self.conversion_file_path = os.path.join(current_folder, "resources/table_of_obstruent_conversions.tsv")
self.representation_option = representation_option
self.phoneme_set_option = phoneme_set_option

Expand All @@ -32,15 +34,14 @@ def __init__(self, representation_option, phoneme_set_option, output_option):

# GET LIST OF SCHWA RULES
self.set_schwa_combinations = set()
file_with_schwa_rules = open("./resources/schwa_rules.tsv", "r", encoding="UTF-8").readlines()
file_with_schwa_rules = open(os.path.join(current_folder, "resources/schwa_rules.tsv"), "r", encoding="UTF-8").readlines()
for line in file_with_schwa_rules:
all_info = line.strip("\n").split("\t")
morph_code = all_info[0]
morph_example = all_info[1]
relevant_msds = all_info[2]
for relevant_msd in relevant_msds.split(", "):
schwa_combination = f"{morph_code} ~ {relevant_msd}"
print(schwa_combination)
self.set_schwa_combinations.add(schwa_combination)

# RESOURCE FUNCTION - LIST OF VOWEL GRAPHEMES
Expand Down
Empty file added slovene_g2p/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit ae3a17c

Please sign in to comment.