Skip to content

Commit 50817f3

Browse files
committed
add support for initializing annotator with model files
spell corrections
1 parent 0a44d3c commit 50817f3

File tree

3 files changed

+120
-26
lines changed

3 files changed

+120
-26
lines changed

md/src/main/java/org/cogcomp/md/AnnotatorTester.java

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@
1010
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
1111
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
1212
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
13-
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation;
1413
import edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader;
15-
import edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader;
16-
import edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader;
1714
import edu.illinois.cs.cogcomp.pos.POSAnnotator;
1815

1916
/**
@@ -22,7 +19,7 @@
2219
*/
2320
public class AnnotatorTester {
2421
/**
25-
* By default, this function uses the ERE model trained with Type on ERE corpus, should have a fairly high performance.
22+
* By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
2623
*/
2724
public static void test_basic_annotator(){
2825
ACEReader aceReader = null;
@@ -71,7 +68,54 @@ public static void test_basic_annotator(){
7168
System.out.println("Type Correct: " + total_type_correct);
7269
System.out.println("Extent Correct: " + total_extent_correct);
7370
}
71+
public static void test_custom_annotator(){
72+
ACEReader aceReader = null;
73+
POSAnnotator posAnnotator = new POSAnnotator();
74+
int total_labeled = 0;
75+
int total_predicted = 0;
76+
int total_correct = 0;
77+
int total_type_correct = 0;
78+
int total_extent_correct = 0;
79+
try {
80+
aceReader = new ACEReader("data/partition_with_dev/dev", false);
81+
MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
82+
for (TextAnnotation ta : aceReader) {
83+
ta.addView(posAnnotator);
84+
mentionAnnotator.addView(ta);
85+
total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
86+
total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
87+
for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()){
88+
for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()){
89+
gc.addAttribute("EntityType", gc.getLabel());
90+
Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
91+
if (gch == null){
92+
continue;
93+
}
94+
if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() &&
95+
Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()){
96+
total_correct ++;
97+
if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))){
98+
total_type_correct ++;
99+
}
100+
if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()){
101+
total_extent_correct ++;
102+
}
103+
break;
104+
}
105+
}
106+
}
107+
}
108+
}
109+
catch (Exception e){
110+
e.printStackTrace();
111+
}
112+
System.out.println("Labeled: " + total_labeled);
113+
System.out.println("Predicted: " + total_predicted);
114+
System.out.println("Correct: " + total_correct);
115+
System.out.println("Type Correct: " + total_type_correct);
116+
System.out.println("Extent Correct: " + total_extent_correct);
117+
}
74118
public static void main(String[] args){
75-
test_basic_annotator();
119+
test_custom_annotator();
76120
}
77121
}

md/src/main/java/org/cogcomp/md/BIOTester.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
*/
88
package org.cogcomp.md;
99

10-
import org.cogcomp.md.LbjGen.*;
1110
import edu.illinois.cs.cogcomp.core.datastructures.Pair;
1211
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
1312
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
@@ -21,9 +20,16 @@
2120
import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
2221
import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
2322
import edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader;
23+
import org.cogcomp.md.LbjGen.bio_classifier_nam;
24+
import org.cogcomp.md.LbjGen.bio_classifier_nom;
25+
import org.cogcomp.md.LbjGen.bio_classifier_pro;
26+
import org.cogcomp.md.LbjGen.bio_label;
2427

2528
import java.lang.reflect.Method;
26-
import java.util.*;
29+
import java.util.ArrayList;
30+
import java.util.HashMap;
31+
import java.util.List;
32+
import java.util.Map;
2733
import java.util.concurrent.ConcurrentHashMap;
2834

2935
/**
@@ -257,6 +263,9 @@ public static Pair<String, Integer> joint_inference(Constituent t, Learner[] can
257263
int chosen = -1;
258264

259265
for (int i = 0; i < candidates.length; i++){
266+
if (candidates[i] == null){
267+
continue;
268+
}
260269
String prediction = candidates[i].discreteValue(t);
261270
preBIOLevel1[i] = prediction;
262271
if (prediction.startsWith("B") || prediction.startsWith("U")){

md/src/main/java/org/cogcomp/md/MentionAnnotator.java

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
*/
88
package org.cogcomp.md;
99

10-
import edu.illinois.cs.cogcomp.pos.POSAnnotator;
11-
import org.cogcomp.md.LbjGen.*;
12-
1310
import edu.illinois.cs.cogcomp.annotation.Annotator;
1411
import edu.illinois.cs.cogcomp.annotation.AnnotatorException;
1512
import edu.illinois.cs.cogcomp.core.datastructures.Pair;
@@ -26,6 +23,10 @@
2623
import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers;
2724
import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.GazetteersFactory;
2825
import org.cogcomp.Datastore;
26+
import org.cogcomp.md.LbjGen.bio_classifier_nam;
27+
import org.cogcomp.md.LbjGen.bio_classifier_nom;
28+
import org.cogcomp.md.LbjGen.bio_classifier_pro;
29+
import org.cogcomp.md.LbjGen.extent_classifier;
2930

3031
import java.io.File;
3132
import java.util.Vector;
@@ -40,9 +41,13 @@
4041
*/
4142
public class MentionAnnotator extends Annotator{
4243

43-
private bio_classifier_nam classifier_nam;
44-
private bio_classifier_nom classifier_nom;
45-
private bio_classifier_pro classifier_pro;
44+
private bio_classifier_nam classifier_nam = null;
45+
private bio_classifier_nom classifier_nom = null;
46+
private bio_classifier_pro classifier_pro = null;
47+
String fileName_NAM = "";
48+
String fileName_NOM = "";
49+
String fileName_PRO = "";
50+
String fileName_EXTENT = "";
4651
private extent_classifier classifier_extent;
4752
private Learner[] candidates;
4853
private FlatGazetteers gazetteers;
@@ -74,16 +79,34 @@ public MentionAnnotator(boolean lazilyInitialize, String mode){
7479
_mode = mode;
7580
}
7681

82+
/**
83+
*
84+
* @param nam_model_path NAM model file path (excluding the extension)
85+
* @param nom_model_path NOM model file path (excluding the extension)
86+
* @param pro_model_path PRO model file path (excluding the extension)
87+
* @param extent_model_path EXTENT model file path (excluding the extension)
88+
* @param mode Useless in this case
89+
*/
90+
public MentionAnnotator(String nam_model_path, String nom_model_path, String pro_model_path, String extent_model_path, String mode){
91+
super(ViewNames.MENTION, new String[]{ViewNames.POS}, true);
92+
_mode = mode;
93+
if (fileName_NAM != null) {
94+
fileName_NAM = nam_model_path;
95+
}
96+
if (fileName_NOM != null) {
97+
fileName_NOM = nom_model_path;
98+
}
99+
if (fileName_PRO != null) {
100+
fileName_PRO = pro_model_path;
101+
}
102+
}
103+
77104
public void initialize(ResourceManager rm){
78-
String fileName_NAM = "";
79-
String fileName_NOM = "";
80-
String fileName_PRO = "";
81-
String fileName_EXTENT = "";
82105
try {
83106
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
84107
if (_mode.contains("ACE")) {
85-
File extentFlie = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
86-
fileName_EXTENT = extentFlie.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
108+
File extentFile = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
109+
fileName_EXTENT = extentFile.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
87110
if (_mode.contains("NON")){
88111
File headFile = ds.getDirectory("org.cogcomp.mention", "ACE_HEAD_NONTYPE", 1.0, false);
89112
fileName_NAM = headFile.getPath() + File.separator + "ACE_HEAD_NONTYPE" + File.separator + "ACE_NAM";
@@ -98,8 +121,8 @@ public void initialize(ResourceManager rm){
98121
}
99122
}
100123
else if (_mode.contains("ERE")){
101-
File extentFlie = ds.getDirectory("org.cogcomp.mention", "ERE_EXTENT", 1.0, false);
102-
fileName_EXTENT = extentFlie.getPath() + File.separator + "ERE_EXTENT" + File.separator + "EXTENT_ERE";
124+
File extentFile = ds.getDirectory("org.cogcomp.mention", "ERE_EXTENT", 1.0, false);
125+
fileName_EXTENT = extentFile.getPath() + File.separator + "ERE_EXTENT" + File.separator + "EXTENT_ERE";
103126
if (_mode.contains("NON")){
104127
File headFile = ds.getDirectory("org.cogcomp.mention", "ERE_HEAD_NONTYPE", 1.0, false);
105128
fileName_NAM = headFile.getPath() + File.separator + "ERE_HEAD_NONTYPE" + File.separator + "ERE_NAM";
@@ -117,11 +140,29 @@ else if (_mode.contains("ERE")){
117140
catch (Exception e){
118141
e.printStackTrace();
119142
}
120-
121-
classifier_nam = new bio_classifier_nam(fileName_NAM + ".lc", fileName_NAM + ".lex");
122-
classifier_nom = new bio_classifier_nom(fileName_NOM+".lc", fileName_NOM + ".lex");
123-
classifier_pro = new bio_classifier_pro(fileName_PRO + ".lc", fileName_PRO + ".lex");
124-
classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
143+
if (!fileName_NAM.equals("")) {
144+
classifier_nam = new bio_classifier_nam(fileName_NAM + ".lc", fileName_NAM + ".lex");
145+
}
146+
if (!fileName_NOM.equals("")) {
147+
classifier_nom = new bio_classifier_nom(fileName_NOM + ".lc", fileName_NOM + ".lex");
148+
}
149+
if (!fileName_PRO.equals("")) {
150+
classifier_pro = new bio_classifier_pro(fileName_PRO + ".lc", fileName_PRO + ".lex");
151+
}
152+
if (!fileName_EXTENT.equals("")) {
153+
classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
154+
}
155+
else {
156+
try {
157+
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
158+
File extentFile = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
159+
fileName_EXTENT = extentFile.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
160+
classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
161+
}
162+
catch (Exception e){
163+
e.printStackTrace();
164+
}
165+
}
125166

126167
try {
127168
Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());

0 commit comments

Comments
 (0)