add support for initializing annotator with model files

Slash0BZ · Slash0BZ · commit 50817f344159 · 2017-10-21T02:47:34.000-05:00
spell corrections
diff --git a/md/src/main/java/org/cogcomp/md/AnnotatorTester.java b/md/src/main/java/org/cogcomp/md/AnnotatorTester.java
@@ -10,10 +10,7 @@
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
-import edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation;
 import edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader;
-import edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREDocumentReader;
-import edu.illinois.cs.cogcomp.nlp.corpusreaders.ereReader.EREMentionRelationReader;
 import edu.illinois.cs.cogcomp.pos.POSAnnotator;
 
 /**
@@ -22,7 +19,7 @@
  */
 public class AnnotatorTester {
     /**
-     * By default, this function uses the ERE model trained with Type on ERE corpus, should have a fairly high performance.
+     * By default, this function uses the ACE model trained with Type on ACE corpus, should have a fairly high performance.
      */
     public static void test_basic_annotator(){
         ACEReader aceReader = null;
@@ -71,7 +68,54 @@ public static void test_basic_annotator(){
         System.out.println("Type Correct: " + total_type_correct);
         System.out.println("Extent Correct: " + total_extent_correct);
     }
+    public static void test_custom_annotator(){
+        ACEReader aceReader = null;
+        POSAnnotator posAnnotator = new POSAnnotator();
+        int total_labeled = 0;
+        int total_predicted = 0;
+        int total_correct = 0;
+        int total_type_correct = 0;
+        int total_extent_correct = 0;
+        try {
+            aceReader = new ACEReader("data/partition_with_dev/dev", false);
+            MentionAnnotator mentionAnnotator = new MentionAnnotator("", "models/TAC_NOM", "", "", "");
+            for (TextAnnotation ta : aceReader) {
+                ta.addView(posAnnotator);
+                mentionAnnotator.addView(ta);
+                total_labeled += ta.getView(ViewNames.MENTION_ACE).getNumberOfConstituents();
+                total_predicted += ta.getView(ViewNames.MENTION).getNumberOfConstituents();
+                for (Constituent pc : ta.getView(ViewNames.MENTION).getConstituents()){
+                    for (Constituent gc : ta.getView(ViewNames.MENTION_ACE).getConstituents()){
+                        gc.addAttribute("EntityType", gc.getLabel());
+                        Constituent gch = ACEReader.getEntityHeadForConstituent(gc, ta, "B");
+                        if (gch == null){
+                            continue;
+                        }
+                        if (Integer.parseInt(pc.getAttribute("EntityHeadStartSpan")) == gch.getStartSpan() &&
+                                Integer.parseInt(pc.getAttribute("EntityHeadEndSpan")) == gch.getEndSpan()){
+                            total_correct ++;
+                            if (pc.getAttribute("EntityType").equals(gc.getAttribute("EntityType"))){
+                                total_type_correct ++;
+                            }
+                            if (pc.getStartSpan() == gc.getStartSpan() && pc.getEndSpan() == gc.getEndSpan()){
+                                total_extent_correct ++;
+                            }
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        catch (Exception e){
+            e.printStackTrace();
+        }
+        System.out.println("Labeled: " + total_labeled);
+        System.out.println("Predicted: " + total_predicted);
+        System.out.println("Correct: " + total_correct);
+        System.out.println("Type Correct: " + total_type_correct);
+        System.out.println("Extent Correct: " + total_extent_correct);
+    }
     public static void main(String[] args){
-        test_basic_annotator();
+        test_custom_annotator();
     }
 }
diff --git a/md/src/main/java/org/cogcomp/md/BIOTester.java b/md/src/main/java/org/cogcomp/md/BIOTester.java
@@ -7,7 +7,6 @@
  */
 package org.cogcomp.md;
 
-import org.cogcomp.md.LbjGen.*;
 import edu.illinois.cs.cogcomp.core.datastructures.Pair;
 import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
 import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
@@ -21,9 +20,16 @@
 import edu.illinois.cs.cogcomp.lbjava.learn.Lexicon;
 import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
 import edu.illinois.cs.cogcomp.nlp.corpusreaders.ACEReader;
+import org.cogcomp.md.LbjGen.bio_classifier_nam;
+import org.cogcomp.md.LbjGen.bio_classifier_nom;
+import org.cogcomp.md.LbjGen.bio_classifier_pro;
+import org.cogcomp.md.LbjGen.bio_label;
 
 import java.lang.reflect.Method;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
 /**
@@ -257,6 +263,9 @@ public static Pair<String, Integer> joint_inference(Constituent t, Learner[] can
         int chosen = -1;
 
         for (int i = 0; i < candidates.length; i++){
+            if (candidates[i] == null){
+                continue;
+            }
             String prediction = candidates[i].discreteValue(t);
             preBIOLevel1[i] = prediction;
             if (prediction.startsWith("B") || prediction.startsWith("U")){
diff --git a/md/src/main/java/org/cogcomp/md/MentionAnnotator.java b/md/src/main/java/org/cogcomp/md/MentionAnnotator.java
@@ -7,9 +7,6 @@
  */
 package org.cogcomp.md;
 
-import edu.illinois.cs.cogcomp.pos.POSAnnotator;
-import org.cogcomp.md.LbjGen.*;
-
 import edu.illinois.cs.cogcomp.annotation.Annotator;
 import edu.illinois.cs.cogcomp.annotation.AnnotatorException;
 import edu.illinois.cs.cogcomp.core.datastructures.Pair;
@@ -26,6 +23,10 @@
 import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.FlatGazetteers;
 import edu.illinois.cs.cogcomp.ner.ExpressiveFeatures.GazetteersFactory;
 import org.cogcomp.Datastore;
+import org.cogcomp.md.LbjGen.bio_classifier_nam;
+import org.cogcomp.md.LbjGen.bio_classifier_nom;
+import org.cogcomp.md.LbjGen.bio_classifier_pro;
+import org.cogcomp.md.LbjGen.extent_classifier;
 
 import java.io.File;
 import java.util.Vector;
@@ -40,9 +41,13 @@
  */
 public class MentionAnnotator extends Annotator{
 
-    private bio_classifier_nam classifier_nam;
-    private bio_classifier_nom classifier_nom;
-    private bio_classifier_pro classifier_pro;
+    private bio_classifier_nam classifier_nam = null;
+    private bio_classifier_nom classifier_nom = null;
+    private bio_classifier_pro classifier_pro = null;
+    String fileName_NAM = "";
+    String fileName_NOM = "";
+    String fileName_PRO = "";
+    String fileName_EXTENT = "";
     private extent_classifier classifier_extent;
     private Learner[] candidates;
     private FlatGazetteers gazetteers;
@@ -74,16 +79,34 @@ public MentionAnnotator(boolean lazilyInitialize, String mode){
         _mode = mode;
     }
 
+    /**
+     *
+     * @param nam_model_path NAM model file path (excluding the extension)
+     * @param nom_model_path NOM model file path (excluding the extension)
+     * @param pro_model_path PRO model file path (excluding the extension)
+     * @param extent_model_path EXTENT model file path (excluding the extension)
+     * @param mode Useless in this case
+     */
+    public MentionAnnotator(String nam_model_path, String nom_model_path, String pro_model_path, String extent_model_path, String mode){
+        super(ViewNames.MENTION, new String[]{ViewNames.POS}, true);
+        _mode = mode;
+        if (fileName_NAM != null) {
+            fileName_NAM = nam_model_path;
+        }
+        if (fileName_NOM != null) {
+            fileName_NOM = nom_model_path;
+        }
+        if (fileName_PRO != null) {
+            fileName_PRO = pro_model_path;
+        }
+    }
+
     public void initialize(ResourceManager rm){
-        String fileName_NAM = "";
-        String fileName_NOM = "";
-        String fileName_PRO = "";
-        String fileName_EXTENT = "";
         try {
             Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
             if (_mode.contains("ACE")) {
-                File extentFlie = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
-                fileName_EXTENT = extentFlie.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
+                File extentFile = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
+                fileName_EXTENT = extentFile.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
                 if (_mode.contains("NON")){
                     File headFile = ds.getDirectory("org.cogcomp.mention", "ACE_HEAD_NONTYPE", 1.0, false);
                     fileName_NAM = headFile.getPath() + File.separator + "ACE_HEAD_NONTYPE" + File.separator + "ACE_NAM";
@@ -98,8 +121,8 @@ public void initialize(ResourceManager rm){
                 }
             }
             else if (_mode.contains("ERE")){
-                File extentFlie = ds.getDirectory("org.cogcomp.mention", "ERE_EXTENT", 1.0, false);
-                fileName_EXTENT = extentFlie.getPath() + File.separator + "ERE_EXTENT" + File.separator + "EXTENT_ERE";
+                File extentFile = ds.getDirectory("org.cogcomp.mention", "ERE_EXTENT", 1.0, false);
+                fileName_EXTENT = extentFile.getPath() + File.separator + "ERE_EXTENT" + File.separator + "EXTENT_ERE";
                 if (_mode.contains("NON")){
                     File headFile = ds.getDirectory("org.cogcomp.mention", "ERE_HEAD_NONTYPE", 1.0, false);
                     fileName_NAM = headFile.getPath() + File.separator + "ERE_HEAD_NONTYPE" + File.separator + "ERE_NAM";
@@ -117,11 +140,29 @@ else if (_mode.contains("ERE")){
         catch (Exception e){
             e.printStackTrace();
         }
-
-        classifier_nam = new bio_classifier_nam(fileName_NAM + ".lc", fileName_NAM + ".lex");
-        classifier_nom = new bio_classifier_nom(fileName_NOM+".lc", fileName_NOM + ".lex");
-        classifier_pro = new bio_classifier_pro(fileName_PRO + ".lc", fileName_PRO + ".lex");
-        classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
+        if (!fileName_NAM.equals("")) {
+            classifier_nam = new bio_classifier_nam(fileName_NAM + ".lc", fileName_NAM + ".lex");
+        }
+        if (!fileName_NOM.equals("")) {
+            classifier_nom = new bio_classifier_nom(fileName_NOM + ".lc", fileName_NOM + ".lex");
+        }
+        if (!fileName_PRO.equals("")) {
+            classifier_pro = new bio_classifier_pro(fileName_PRO + ".lc", fileName_PRO + ".lex");
+        }
+        if (!fileName_EXTENT.equals("")) {
+            classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
+        }
+        else {
+            try {
+                Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
+                File extentFile = ds.getDirectory("org.cogcomp.mention", "ACE_EXTENT", 1.0, false);
+                fileName_EXTENT = extentFile.getPath() + File.separator + "ACE_EXTENT" + File.separator + "EXTENT_ACE";
+                classifier_extent = new extent_classifier(fileName_EXTENT + ".lc", fileName_EXTENT + ".lex");
+            }
+            catch (Exception e){
+                e.printStackTrace();
+            }
+        }
 
         try {
             Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());