Skip to content

Commit

Permalink
Also flatten combined RB or ADVP phrases
Browse files Browse the repository at this point in the history
  • Loading branch information
AngledLuffa committed Apr 29, 2024
1 parent 8b2b668 commit 6245acc
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion src/edu/stanford/nlp/trees/QPTreeTransformer.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,25 @@ public Tree transformTree(Tree t) {
private static final TsurgeonPattern splitMoneyTsurgeon =
Tsurgeon.parseOperation("createSubtree QP left right");

// This fixes a very rare subset of parses
// such as "(NP (QP just about all) the losses) ..."
// in fact, that's the only example in ptb3-revised
// because of previous MWE combinations, we may already get
// "(NP (QP at least a) day)"
// -> "(NP (QP (ADVP at least) a) day)"
// and therefore the flattenAdvmodTsurgeon will also find that parse
private static final TregexPattern groupADVPTregex =
TregexPattern.compile("NP < (QP <1 RB=first <2 RB=second <3 (DT !$+ __) $++ /^N/)");

private static final TsurgeonPattern groupADVPTsurgeon =
Tsurgeon.parseOperation("createSubtree ADVP first second");

// Remove QP in a structure such as
// (NP (QP nearly_RB all_DT) stuff_NN)
// so that the converter can attach both `nearly` and `all` to `stuff`
// not using a nummod, either, which is kind of annoying
private static final TregexPattern flattenAdvmodTregex =
TregexPattern.compile("NP < (QP=remove <1 RB <2 (DT !$+ __) $++ /^N/)");
TregexPattern.compile("NP < (QP=remove <1 ADVP|RB <2 (DT !$+ __) $++ /^N/)");

private static final TsurgeonPattern flattenAdvmodTsurgeon =
Tsurgeon.parseOperation("excise remove remove");
Expand All @@ -131,6 +144,7 @@ public Tree QPtransform(Tree t) {
}
t = Tsurgeon.processPattern(splitCCTregex, splitCCTsurgeon, t);
t = Tsurgeon.processPattern(splitMoneyTregex, splitMoneyTsurgeon, t);
t = Tsurgeon.processPattern(groupADVPTregex, groupADVPTsurgeon, t);
t = Tsurgeon.processPattern(flattenAdvmodTregex, flattenAdvmodTsurgeon, t);
return t;
}
Expand Down

0 comments on commit 6245acc

Please sign in to comment.