Skip to content

Commit

Permalink
Add lemmas to a few of the MWTs that we combine for English. A few ot…
Browse files Browse the repository at this point in the history
…hers are still TODO, such as the n't 'll etc suite
  • Loading branch information
AngledLuffa committed Mar 13, 2024
1 parent 1dd746c commit 850e588
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {

// combine using the CombineMWT operation, using the default concatenation for the MWT text
String mwt = String.join(newline,
// TODO: separate the contractions so we can adjust the lemmas?
// In some other way fix those lemmas?
"<ssurgeon-pattern-list>",
" <ssurgeon-pattern>",
" <uid>1</uid>",
Expand All @@ -30,9 +32,19 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {
" </ssurgeon-pattern>",
" <ssurgeon-pattern>",
" <uid>3</uid>",
" <notes>Edit a node's MWT for wanna/gonna</notes>",
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)wan|gon/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
" <notes>Edit a node's MWT for wanna</notes>",
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)wan/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
" <edit-list>CombineMWT -node first -node second</edit-list>",
" <edit-list>EditNode -node first -lemma want</edit-list>",
" <edit-list>EditNode -node second -lemma to</edit-list>",
" </ssurgeon-pattern>",
" <ssurgeon-pattern>",
" <uid>3b</uid>",
" <notes>Edit a node's MWT for gonna</notes>",
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)gon/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
" <edit-list>CombineMWT -node first -node second</edit-list>",
" <edit-list>EditNode -node first -lemma go</edit-list>",
" <edit-list>EditNode -node second -lemma to</edit-list>",
" </ssurgeon-pattern>",
" <ssurgeon-pattern>",
" <uid>4</uid>",
Expand All @@ -45,6 +57,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {
" <notes>Edit a node's MWT for 'tis and 'twas</notes>",
" <semgrex>" + XMLUtils.escapeXML("{word:/'[tT]/}=first . {word:/(?i)is|was/}=second") + "</semgrex>",
" <edit-list>CombineMWT -node first -node second</edit-list>",
" <edit-list>EditNode -node first -lemma it</edit-list>",
" <edit-list>EditNode -node second -lemma be</edit-list>",
" </ssurgeon-pattern>",
" <ssurgeon-pattern>",
" <uid>6</uid>",
Expand Down

0 comments on commit 850e588

Please sign in to comment.