ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1713674 - in /ctakes/sandbox/ctakes-coref-cleartk/scripts: ./ analyze.py
Date Tue, 10 Nov 2015 15:54:43 GMT
Author: tmill
Date: Tue Nov 10 15:54:42 2015
New Revision: 1713674

URL: http://svn.apache.org/viewvc?rev=1713674&view=rev
Log:
Added script to repo to analyze and visualized outputs.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/scripts/
    ctakes/sandbox/ctakes-coref-cleartk/scripts/analyze.py

Added: ctakes/sandbox/ctakes-coref-cleartk/scripts/analyze.py
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/scripts/analyze.py?rev=1713674&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/scripts/analyze.py (added)
+++ ctakes/sandbox/ctakes-coref-cleartk/scripts/analyze.py Tue Nov 10 15:54:42 2015
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+
+import sys
+from cort.core import corpora
+from cort.analysis import error_extractors
+from cort.analysis import spanning_tree_algorithms
+from cort.analysis import plotting
+
+def main(args):
+
+    if len(args) < 2:
+        sys.stderr.write("Requires two arguments <gold output> <system output>*")
+        sys.exit(-1)
+        
+    outputs = dict()
+    
+    for ind,arg in enumerate(args):
+        last_dot_ind = arg.rfind('.')
+        last_slash_ind = arg.rfind('/')
+        
+        if ind == 0:
+            name = "gold"
+        else:
+            name = arg[last_slash_ind+1:last_dot_ind]
+
+        print("Loading %s" % name)
+        outputs[name] = corpora.Corpus.from_file(name, open(arg))
+    
+    print("Extracting errors")
+    extractor = error_extractors.ErrorExtractor(
+        outputs["gold"],
+        spanning_tree_algorithms.recall_accessibility,
+        spanning_tree_algorithms.precision_system_output
+    )
+
+    for name in outputs.iterkeys():
+        if not name == "gold":
+            extractor.add_system(outputs[name])
+
+    print("Getting errors...")
+    errors = extractor.get_errors()
+    
+    print("Sorting errors by type...")
+    errors_by_type = errors.categorize(
+        lambda error: error[0].attributes['type']
+    )
+    print("Calling visualize")
+    
+    errs = dict()
+    for name in outputs.iterkeys():
+        errors_by_type.visualize(name)
+        errs[name] = errors_by_type[name]["recall_errors"]["all"]
+        
+#    pair_errs = errors_by_type["pair"]["recall_errors"]["all"]
+#    tree_errs = errors_by_type["tree"]["recall_errors"]["all"]
+    
+#    plotting.plot(
+#    [("pair", [(cat, len(errs)) for cat, errs in pair_errs.items()]),
+#     ("tree", [(cat, len(errs)) for cat, errs in tree_errs.items()])],
+#    "Recall Errors",
+#    "Type of anaphor",
+#    "Number of Errors")
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
+



Mime
View raw message