Skip to content

Commit

Permalink
Support RMAC forms from OpenGNT / TAGNT
Browse files Browse the repository at this point in the history
  • Loading branch information
schierlm committed Feb 2, 2024
1 parent 7519c27 commit fec4d4f
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,8 @@ private static String tagOne(String str, Pattern pattern, String languageCode) {
}

protected static String convertMorphology(String rmac) {
Matcher m = Utils.compilePattern("([NARCDTKIXQFSP])(-([123]?)([NVGDA][SP][MFN]?))?(-(S|C|ABB|I|N|K|ATT))?").matcher(rmac);
rmac = rmac.replaceFirst("^(S-[123])[SP]([NVGDA][SP][MFN](-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?)$", "$1$2");
Matcher m = Utils.compilePattern("([NARCDTKIXQFSP])(-([123]?)([NVGDA][SP][MFN]?))?(?:-(?:[PLT]|[PL]G|LI|NUI))?(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?").matcher(rmac);
if (m.matches()) {
char type = m.group(1).charAt(0);
String person = m.group(3);
Expand Down Expand Up @@ -584,7 +585,7 @@ else if (flags.length() == 2 && cops != null)
return "R" + type + person + flags;
}
} else if (rmac.startsWith("V-")) { // @V[AFILPRT][AMPU][IMNOPS][123][DPS][ADGNV][FMN]
Matcher mm = Utils.compilePattern("V-2?([PIFARLX])([AMPEDONQX][ISOMNP])(-([123][SP])|-([NGDAV][SPD][MFN]))?(-ATT)?").matcher(rmac);
Matcher mm = Utils.compilePattern("V-2?([PIFARLX])([AMPEDONQX][ISOMNP])(-([123][SP])|-([NGDAV][SPD][MFN]))?(-ATT|-ARAM|-HEB)?").matcher(rmac);
if (!mm.matches())
throw new RuntimeException(rmac);
String tense = mm.group(1);
Expand All @@ -603,8 +604,10 @@ else if (flags.length() == 2 && cops != null)
}
return "V" + tense + voice + "" + flags.charAt(1) + opt;
} else {
if (rmac.endsWith("-ATT") || rmac.endsWith("-ABB"))
if (rmac.endsWith("-ATT") || rmac.endsWith("-ABB") || rmac.endsWith("-HEB"))
rmac = rmac.substring(0, rmac.length()-4);
else if (rmac.endsWith("-ARAM"))
rmac = rmac.substring(0, rmac.length()-5);
switch (rmac) {
case "ADV": // @B[CEIKNPSX]
return "B";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,30 @@ public class RMACConversionTest {
"ADV", "CONJ", "COND", "PRT", "PREP", "INJ",
"ARAM", "HEB", "N-PRI", "A-NUI", "N-LI", "N-OI",
"V-[PIFARLX][AMPEDONQX][ISOMNP]",
"V-2[FARL][AMPEDONQX][ISOMNP]",
"V-2[PFARL][AMPEDONQX][ISOMNP]",
"V-[PIFARLX][AMPEDONQX][ISOMNP]-[NGDAV][SPD][MFN]",
"V-2[FARL][AMPEDONQX][ISOMNP]-[NGDAV][SPD][MFN]",
"V-2[PFARL][AMPEDONQX][ISOMNP]-[NGDAV][SPD][MFN]",
"V-[PIFARLX][AMPEDONQX][ISOMNP]-[123][SP]",
"V-2[FARL][AMPEDONQX][ISOMNP]-[123][SP]",
"V-2[PFARL][AMPEDONQX][ISOMNP]-[123][SP]",
"A-[NVGDA][SP][MFN]-NUI",
"S-[123][SP][NVGDA][SP][MFN]",
"[NARCDTKIXQFSP]",
"[NARCDTKIXQFSP]-[123][NVGDA][SP][MFN]",
"[NARCDTKIXQFSP]-[NVGDA][SP][MFN]",
"[NARCDTKIXQFSP]-[123][NVGDA][SP]",
"[NARCDTKIXQFSP]-[NVGDA][SP]",
"[NA]-[NVGDA][SP][MFN]-[PLT]",
"[NA]-[NVGDA][SP][MFN]-[PL]G",
"[NA]-[NVGDA][SP][MFN]-LI",
};

protected static List<String> computePatterns() {
List<String> result = new ArrayList<>();
for (String prefix : PATTERN_PREFIXES) {
result.add(prefix);
result.add(prefix + "-ATT");
result.add(prefix + "-ARAM");
result.add(prefix + "-HEB");
if (!prefix.startsWith("V")) {
result.add(prefix + "-ABB");
result.add(prefix + "-S");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static void main(String[] args) throws Exception {
bw.write("window.onload = function() {\n");
bw.write("\tlet errors = \"\", log = \"\", seen = {};\n\tfor (let rmac of choices) { \n\t\tlet x;\n");
bw.write("\t\ttry {\n\t\t\tx = renderRMAC(rmac);\n\t\t} catch (e) {\n\t\t\terrors +=\"<br>\"+rmac+\" results in error: \"+e;\n");
bw.write("\t\t\terrcount++;\n\t\t\tcontinue;\n\t\t}\n\t\tif (seen[x]) {\n\t\t\terrors += \"<br>Both \"+seen[x]+\" and \"+rmac+\" render to the same value: \"+x;\n");
bw.write("\t\t\tcontinue;\n\t\t}\n\t\tif (seen[x]) {\n\t\t\terrors += \"<br>Both \"+seen[x]+\" and \"+rmac+\" render to the same value: \"+x;\n");
bw.write("\t\t} else if (x == rmac || x.indexOf(\"undefined\") != -1) {\n\t\t\terrors += \"<br>\"+rmac + \" renders as \" + x;\n\t\t} else {\n");
bw.write("\t\t\tlog +=\"<br>\"+rmac+\" renders as \" + x;\n\t\t}\n\t\tseen[x] = rmac;\n\t}\n");
bw.write("\tdocument.getElementById(\"errors\").innerHTML = errors; document.getElementById(\"log\").innerHTML = log;\n};\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public class Utils {
public static final String VERSE_REGEX = "[1-9][0-9,/.-]*[a-zG]?";

private static final String RMAC_UNDECLINED = "ADV|CONJ|COND|PRT|PREP|INJ|ARAM|HEB|N-PRI|A-NUI|N-LI|N-OI";
private static final String RMAC_DECLINED = "[NARCDTKIXQFSP](-[123]?[NVGDA][SP][MFN]?)?";
private static final String RMAC_VERBS = "V-([PIFARLX]|2[FARL])[AMPEDONQX][ISOMNP](-([123][SP]|[NGDAV][SPD][MFN]))?";
public static final String RMAC_REGEX = "(" + RMAC_UNDECLINED + "|" + RMAC_DECLINED + ")(-(S|C|ABB|I|N|K|ATT))?|" + RMAC_VERBS + "(-ATT)?";
private static final String RMAC_DECLINED = "[NARCDTKIXQFSP](-[123]?[NVGDA][SP][MFN]?)?|[NA]-[NVGDA][SP][MFN]-([PLT]|[PL]G|LI)|A-[NVGDA][SP][MFN]-NUI|S-[123][SP][NVGDA][SP][MFN]";
private static final String RMAC_VERBS = "V-([PIFARLX]|2[PFARL])[AMPEDONQX][ISOMNP](-([123][SP]|[NGDAV][SPD][MFN]))?";
public static final String RMAC_REGEX = "(" + RMAC_UNDECLINED + "|" + RMAC_DECLINED + ")(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?|" + RMAC_VERBS + "(-ATT|-ARAM|-HEB)?";

public static int validateNumber(String name, int value, int min, int max) {
if (value < min || value > max)
Expand Down
47 changes: 34 additions & 13 deletions biblemulticonverter/src/main/resources/RoundtripHTML/script.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ function renderRMAC(rmac) {
I : "Interrogative",
N : "Negative",
K : "Kai",
ARAM : "Aramaic",
HEB : "Hebrew",
ATT : "Attic Greek form"
};
if (rmac.substring(0, 2) == "V-") {
var parts = rmac.match("V-([PIFARLX]|2[FARL])([AMPEDONQX][ISOMNP])(-([123][SP]|[NGDAV][SPD][MFN]))?(-ATT)?");
var parts = rmac.match("V-([PIFARLX]|2[PFARL])([AMPEDONQX][ISOMNP])(-([123][SP]|[NGDAV][SPD][MFN]))?(-ATT|-ARAM|-HEB)?");
if (parts != null && parts.length == 6) {
var extra = "";
if (parts[4]) {
Expand Down Expand Up @@ -113,6 +115,7 @@ function renderRMAC(rmac) {
R : "Perfect",
L : "Pluperfect",
X : "No tense stated",
"2P" : "Second Present",
"2F" : "Second Future",
"2A" : "Second Aorist",
"2R" : "Second peRfect",
Expand All @@ -136,37 +139,55 @@ function renderRMAC(rmac) {
P : "Participle"
})[parts[2].substring(1, 2)] + extra + ")";
}
} else if (rmac.search("^[NARCDTKIXQFSP](-[123]?[NVGDA][SP][MFN]?)?(-(S|C|ABB|I|N|K|ATT))?$") != -1) {
var parts = rmac.match("^([NARCDTKIXQFSP])(-([123]?)([NVGDA][SP][MFN]?))?(-(S|C|ABB|I|N|K|ATT))?$");
if (parts.length == 7) {
} else if (rmac.search("^[NARCDTKIXQFSP](-[123]?[SP]?[NVGDA][SP][MFN]?)?(-([PLT]|[PL]G|LI|NUI))?(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?$") != -1) {
var parts = rmac.match("^([NARCDTKIXQFSP])(-([123]?)([SP]?)([NVGDA][SP][MFN]?))?(-([PLT]|[PL]G|LI|NUI))?(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?$");
if (parts.length == 10) {
var extra = "";
if (parts[4] || parts[6]) {
if (parts[5] || parts[7] || parts[9]) {
extra += " (";
if (parts[3]) {
extra += "Person=" + parts[3] + ", ";
}
if (parts[4]) {
extra += "Number=" + ({
S : "Singular",
D : "Dual",
P : "Plural"
})[parts[4]] + ", ";
}
if (parts[5]) {
extra += "Case=" + ({
N : "Nominative",
G : "Genitive",
D : "Dative",
A : "Accusative",
V : "Vocative"
})[parts[4].substring(0, 1)] + ", Number=" + ({
})[parts[5].substring(0, 1)] + ", Number=" + ({
S : "Singular",
D : "Dual",
P : "Plural"
})[parts[4].substring(1, 2)];
if (parts[4].length == 3) {
})[parts[5].substring(1, 2)];
if (parts[5].length == 3) {
extra += ", Gender=" + ({
M : "Masculine",
F : "Feminine",
N : "Neuter"
})[parts[4].substring(2, 3)];
})[parts[5].substring(2, 3)];
}
}
if (parts[6]) {
extra += (parts[4] ? ", " : "") + suffixes[parts[6]];
if (parts[7]) {
extra += (parts[5] ? ", " : "") + ({
P : "Person",
L : "Location",
T : "Title",
PG : "Person Gentilic",
LG : "Location Gentilic",
LI : "Letter Indeclinable",
NUI : "Numerical Indiclinable",
})[parts[7]];
}
if (parts[9]) {
extra += (parts[5] || parts[7] ? ", " : "") + suffixes[parts[9]];
}
extra += ")";
}
Expand All @@ -186,8 +207,8 @@ function renderRMAC(rmac) {
P : "Personal pronoun"
})[parts[1]] + extra;
}
} else if (rmac.search("^(ADV|CONJ|COND|PRT|PREP|INJ|ARAM|HEB|N-PRI|A-NUI|N-LI|N-OI)(-(S|C|ABB|I|N|K|ATT))?$") != -1) {
var parts = rmac.match("^(ADV|CONJ|COND|PRT|PREP|INJ|ARAM|HEB|N-PRI|A-NUI|N-LI|N-OI)(-(S|C|ABB|I|N|K|ATT))?$");
} else if (rmac.search("^(ADV|CONJ|COND|PRT|PREP|INJ|ARAM|HEB|N-PRI|A-NUI|N-LI|N-OI)(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?$") != -1) {
var parts = rmac.match("^(ADV|CONJ|COND|PRT|PREP|INJ|ARAM|HEB|N-PRI|A-NUI|N-LI|N-OI)(-(S|C|ABB|I|N|K|ATT|ARAM|HEB))?$");
if (parts.length == 4) {
return ({
ADV : "Adverb or adverb and particle combined",
Expand Down

0 comments on commit fec4d4f

Please sign in to comment.