Skip to content

Commit

Permalink
Update the UniversalPOSMapper to use AUX for a large chunk of the dep…
Browse files Browse the repository at this point in the history
…endencies by reusing the patterns from UniversalEnglishGrammaticalRelations to find those words. Currently it is finding more than it should, but the error rate is significantly lower than it is without this change
  • Loading branch information
AngledLuffa committed Mar 14, 2024
1 parent 7f70ad8 commit 30f2f8e
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 27 deletions.
4 changes: 4 additions & 0 deletions src/edu/stanford/nlp/trees/GrammaticalRelation.java
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,10 @@ private void addChild(GrammaticalRelation child) {
children.add(child);
}

public List<TregexPattern> targetPatterns() {
return Collections.unmodifiableList(targetPatterns);
}

/** Given a {@code Tree} node {@code t}, attempts to
* return a list of nodes to which node {@code t} has this
* grammatical relation, with {@code t} as the governor.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,21 @@ private UniversalEnglishGrammaticalRelations() {}
* <br>
* Example: <br>
* "Reagan has died" &rarr; {@code aux}(died, has)
* <br>
* For any pattern in AUX_MODIFIER, AUX_PASSIVE_MODIFIER, and COPULA
* where the target is not the verb itself, but rather the enclosing
* constituent, there is a tregex named variable:
* =aux
* Please make sure to maintain this. Those tags are used in
* UniversalPOSMapper to update the tags
*/
public static final GrammaticalRelation AUX_MODIFIER =
new GrammaticalRelation(Language.UniversalEnglish, "aux", "auxiliary",
DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler,
"VP < VP < (/^(?:MD|VB.*|AUXG?|POS)$/=target)",
"SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)",
// add handling of tricky VP fronting cases...
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))");
"SINV < (VP=target < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ") $-- (VP < VBG))");


/**
Expand All @@ -156,15 +163,17 @@ private UniversalEnglishGrammaticalRelations() {}
*
* Example: <br>
* "Kennedy has been killed" &rarr; {@code auxpass}(killed, been)
* <br>
* See AUX_MODIFIER for an explanation of the =aux named nodes
*/
public static final GrammaticalRelation AUX_PASSIVE_MODIFIER =
new GrammaticalRelation(Language.UniversalEnglish, "aux:pass", "passive auxiliary",
AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler,
"VP < (/^(?:VB|AUX|POS)/=target < " + passiveAuxWordRegex + " ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )",
"SQ|SINV < (/^(?:VB|AUX|POS)/=target < " + beAuxiliaryRegex + " $++ (VP < VBD|VBN))",
// add handling of tricky VP fronting cases...
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))",
"SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))");
"SINV < (VP=target < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))",
"SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))");

/**
* The "copula" grammatical relation. A copula is the relation between
Expand All @@ -173,6 +182,8 @@ private UniversalEnglishGrammaticalRelations() {}
* Examples: <br>
* "Bill is big" &rarr; {@code cop}(big, is) <br>
* "Bill is an honest man" &rarr; {@code cop}(man, is)
* <br>
* See AUX_MODIFIER for an explanation of the =aux named nodes
*/
public static final GrammaticalRelation COPULA =
new GrammaticalRelation(Language.UniversalEnglish, "cop", "copula",
Expand All @@ -182,7 +193,7 @@ private UniversalEnglishGrammaticalRelations() {}
// matches (what, is) in "what is that" after the SQ has been flattened out of the tree
"SBARQ < (/^(?:VB|AUX)/=target < " + copularWordRegex + ") < (WHNP < WP)",
// "Such a great idea this was"
"SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))");
"SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/=aux < " + copularWordRegex + "))))");

/**
* The "conjunct" grammatical relation. A conjunct is the relation between
Expand Down
46 changes: 23 additions & 23 deletions src/edu/stanford/nlp/trees/UniversalPOSMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,30 +70,30 @@ public static void load() {

}

List<TregexPattern> auxPatterns = new ArrayList<>();
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.AUX_MODIFIER.targetPatterns());
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER.targetPatterns());
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.COPULA.targetPatterns());
for (TregexPattern pattern : auxPatterns) {
// note that the original patterns capture both VB and AUX...
// if we capture AUX here, infinite loop!
// also, we don't relabel POS, since that would be a really weird UPOS/XPOS combination
final String newTregex;
final String newTsurgeon;
if (pattern.knownVariables().contains("aux")) {
newTregex = pattern.pattern() + ": (=aux == /^(?:VB)/)";
newTsurgeon = "relabel aux AUX";
} else {
newTregex = pattern.pattern() + ": (=target == /^(?:VB)/)";
newTsurgeon = "relabel target AUX";
}
operations.add(new Pair<>(TregexPattern.compile(newTregex),
Tsurgeon.parseOperation(newTsurgeon)));
}

String [][] otherContextMappings = new String [][] {
// Don't do this, we are now treating these as copular constructions
// VB.* -> AUX (for passives where main verb is part of an ADJP)
// @VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
//relabel target AUX",

// VB.* -> AUX (for cases with fronted main VPs)
{ "@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))",
"AUX", },
// VB.* -> AUX (another, rarer case of fronted VPs)
{ "@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))",
"AUX", },

// VB.* -> AUX (passive, case 2)
//"%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))",
//"%relabel target AUX",
// VB.* -> AUX (active, case 1)
{ "VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)",
"AUX", },

// VB -> AUX (active, case 2)
{ "@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})", "AUX" },

// otherwise, VB.* -> VERB
// this will capture all verbs not found by the AUX_MODIFIER, AUX_PASSIVE_MODIFIER, and COPULA expressions above
// VB.* -> VERB
{ "/^VB.*/=target <... {/.*/}", "VERB", },

// IN -> SCONJ (subordinating conjunctions)
Expand Down

0 comments on commit 30f2f8e

Please sign in to comment.