Skip to content

Commit

Permalink
More Logos RMAC expansion fixes
Browse files Browse the repository at this point in the history
Also, the unused expansion pattern detection was faulty - fix it and
adjust patterns accordingly
  • Loading branch information
schierlm committed Feb 15, 2024
1 parent 786bdc9 commit bfde314
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ private static String tagOne(String str, Pattern pattern, String languageCode) {

protected static String convertMorphology(String rmac) {
rmac = rmac.replaceFirst("^(S-[123])[SP]([NVGDA][SP][MFN](-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?)$", "$1$2");
Matcher m = Utils.compilePattern("([NARCDTKIXQFSP])(-([123]?)([NVGDA][SP][MFN]?))?(?:-([PLT]|[PL]G|LI|NUI))?(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?").matcher(rmac);
Matcher m = Utils.compilePattern("([NARCDTKIXQFSP])(-([123]?)([NVGDA][SP][MFN]?))?(?:-([PLT]|[PL]G|LI|NUI))?(?:-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?").matcher(rmac);
if (m.matches()) {
if (rmac.startsWith("N-LI"))
return "XL";
Expand Down Expand Up @@ -590,7 +590,7 @@ else if (flags.length() == 2 && !cops.isEmpty())
case 'T': // @D[ADGNV][DPS][FMN]
return "D" + flags;
case 'Q': // correlative or interrogative -> RK or RI
return "RX" + person + flags;
return person.isEmpty() ? "R" : "R?" + person + flags;
case 'R': // @R[CDFIKNPRSX][123][ADGNV][DPS][FMN][AP]
case 'X':
case 'F':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ public class RMACConversionTest {
};

private static final String[] LOGOS_PATTERNS = {
"J[ADGNV?][DPS?][FMN?][COPS?]", // Adjective
"B[CEIKNPSX?]", // Adverb
"D[ADGNV?][DPS?][FMN?]", // Article
"J[ADGNV?][PS?][FMN?][COS?]", // Adjective "J[ADGNV?][DPS?][FMN?][COPS?]"
"B[CIKNS?]", // Adverb "B[CEIKNPSX?]"
"D[ADGNV?][PS?][FMN?]", // Article "D[ADGNV?][DPS?][FMN?]"
"I", // Interjection
"C[[ACADALAMANAPARATAZLALCLDLILKLMLNLTLXSCSE??]]", // Conjunction
"T[CEIKNPSX?]", // Particle
"C", // Conjunction C[[ACADALAMANAPARATAZLALCLDLILKLMLNLTLXSCSE??]]
"T[CIN?]", // Particle "T[CEIKNPSX?]"
"P", // Preposition
"R[CDFIKNPRSX][123?][ADGNV?][DPS?][FMN?][AP?]", // Pronoun
"N[ADGNV?][DPS?][FMN?][COPS?]", // Noun
"R[CDFIKPRSX?][123?][ADGNV?][PS?][FMN?]", // Pronoun "R[CDFIKNPRSX?][123?][ADGNV?][DPS?][FMN?][AP?]"
"N[ADGNV?][PS?][FMN?][COS?]", // Noun "N[ADGNV?][DPS?][FMN?][COPS?]"
"X[FLNOP]", // Indeclinable
"V[AFILPRT?][AMPU?][IMNOPS][123?][DPS?][ADGNV?][FMN?]", // Verb
"V[AFILPR?][AMPU?][IMNOPS][123][PS]", "V[AFILPRT?][AMPU?][IMNOPS]?[DPS?][ADGNV?][FMN?]", // Verb "V[AFILPRT?][AMPU?][IMNOPS][123?][DPS?][ADGNV?][FMN?]"
};

protected static List<String> computePatterns() {
Expand Down Expand Up @@ -116,7 +116,7 @@ public void testLogosConversion() {
Set<String> logosExpansions = new HashSet<>(), unusedExpansions = new HashSet<>();
for (String logosPattern : LOGOS_PATTERNS) {
for (String expanded : expandPattern(logosPattern)) {
if (!logosPattern.contains("?")) {
if (!expanded.contains("?")) {
unusedExpansions.add(expanded);
} else {
expanded = expanded.replaceAll("\\?+$", "");
Expand Down Expand Up @@ -146,6 +146,8 @@ private void testLogosWord(String rmac, String logos) {
public void testLogosRegressions() {
testLogosWord("N-GSM-P", "NGSM:XP");
testLogosWord("V-XXM-2P", "V??M2P");
testLogosWord("Q", "R");
testLogosWord("N-NPN-C", "NNPNC");
}

}

0 comments on commit bfde314

Please sign in to comment.