ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1712083 [3/3] - in /ctakes/sandbox/ctakes-clinical-deid: ./ GATE/ GATE/pipeline/ GATE/plugins/ GATE/plugins/ANNIE/ GATE/plugins/ANNIE/.annie-defaults-metadata/ GATE/plugins/ANNIE/resources/ GATE/plugins/ANNIE/resources/gazetteer/ GATE/plug...
Date Mon, 02 Nov 2015 16:59:52 GMT
Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/other/PreProcess.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Extract.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Input.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/Overlap.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/pipeline/secondpass/PassTwo.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/bin/co/dehghan/cdeid/ui/Deid.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ant-1.9.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ant-launcher-1.9.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/aopalliance-1.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-core-0.7.2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/apache-mime4j-dom-0.7.2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/bcmail-jdk15-1.45.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/bcprov-jdk15-1.45.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-codec-1.9.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-compress-1.8.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-io-2.4.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-lang-2.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/commons-logging-1.1.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/flying-saucer-core-9.0.4.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/fontbox-1.8.8.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate-asm-5.0.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate-compiler-jdt-4.3.2-P20140317-1600.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gate.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/gateLauncher.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/hamcrest-core-1.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/ivy-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-annotations-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-core-2.3.2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jackson-databind-2.3.2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/java-getopt-1.0.13.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jaxen-1.1.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jdom-1.1.3.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/jempbox-1.8.8.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/joda-time-2.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/junit-4.11.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/log4j-1.2.17.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/nekohtml-1.9.14.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/pdfbox-app-1.8.10.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-3.11.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-3.11.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-ooxml-schemas-3.11.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/poi-scratchpad-3.11.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-aop-2.5.6.SEC01.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-beans-2.5.6.SEC01.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/spring-core-2.5.6.SEC01.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/stax2-api-3.1.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/tika-core-1.7.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/tika-parsers-1.7.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/validationtools.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/woodstox-core-lgpl-4.2.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xercesImpl-2.9.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xmlbeans-2.6.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xmlunit-1.5.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xpp3-1.1.4c.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar?rev=1712083&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/sandbox/ctakes-clinical-deid/libs/xstream-1.4.7.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/controller/Controller.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,287 @@
+package co.dehghan.cdeid.controller;
+
+import gate.creole.ResourceInstantiationException;
+//import validationtools.Evaluation;
+//import validationtools.GenerateGateDocument;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.TrueFileFilter;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.util.PDFTextStripper;
+
+import co.dehghan.cdeid.io.Output;
+import co.dehghan.cdeid.pipeline.firstpass.PassOne;
+import co.dehghan.cdeid.pipeline.firstpass.ner.EmailNER;
+import co.dehghan.cdeid.pipeline.firstpass.ner.UrlNER;
+import co.dehghan.cdeid.pipeline.secondpass.Input;
+import co.dehghan.cdeid.pipeline.secondpass.PassTwo;
+import co.dehghan.cdeid.pipeline.other.PostProcess;
+import co.dehghan.cdeid.pipeline.other.PreProcess;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehgha
+ * 
+ * Controller.
+ * 
+ * Import lib/validationtools.jar if you intend to validate or continue to develop cDeid; see TESTING commented code.
+ * Obtain the data from http://www.i2b2.org/NLP.
+ */
+public class Controller {
+	
+	public Controller(){}
+	
+	/*
+	 * TESTING
+	 
+	public static void main(String[] args) throws ResourceInstantiationException, IOException{
+		Controller c = new Controller();
+		c.run(new File("i2b2-2014/TrackI/testing-PHI-Gold-fixed/"), new File("/home/dehghana/Data/i2b2-2014/op/"), "gatexml");
+	}
+	*/
+	
+	/**
+	 * Run NLP components:
+	 * 1. Pre-processing
+	 * 2. PassOne
+	 * 3. PassTwo
+	 * 4. Post-processing 
+	 * 
+	 * @param r_corpus input directory
+	 * @param w_corpus output directory
+	 * @throws ResourceInstantiationException
+	 * @throws IOException 
+	 */
+	public void run(File input_dir, File output_dir, String outputFormat) throws ResourceInstantiationException, IOException{
+		
+		PreProcess p = new PreProcess();
+
+		gate.Document gateDoc = null;
+
+		Collection<File> fileList = FileUtils.listFiles(new File(input_dir.getAbsolutePath()), TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
+		
+		//to store processed documents @runtime
+		ArrayList<gate.Document> gateDocList = new ArrayList<gate.Document>();
+						
+		/*
+		 * TESTING: 
+		  ArrayList<gate.Document> gateList = GenerateGateDocument.getGateDocument(input_dir.getAbsolutePath()); 
+		*/
+		
+		//for each input document:
+		System.out.println("Processing document(s):");
+		for(File f: fileList){
+		System.out.print("\r"+f.getName());
+		
+		/* TESTING
+		  for(gate.Document gd: gateList){ 
+			gateDoc = gd;
+		*/
+			
+			gateDoc = Document.getGateDocument(f);
+			if(gateDoc == null)
+				continue;
+			
+			//NLP pre-process i.e., Tokeniser and Sentence splitter
+			gateDoc.setName(f.getName()); //set name; necessary for PassTwo
+			p.preProcessingPipeline(gateDoc);
+			
+			gateDocList.add(gateDoc);	
+		}
+		System.out.print("\r");
+		/*
+		 * run pass one
+		 */
+		 runFirstPassPipeline(gateDocList);
+		
+		/*
+		 * run pass two 
+		 */
+		 runSecondPassPipeline(gateDocList);
+		
+		/*
+		 * run post processing pipeline
+		 */
+		 runPostProcessingPipeline(gateDocList);
+		
+		/*
+		 * output 
+		 */
+		saveNotes(output_dir, gateDocList, outputFormat);
+		
+		/*
+		 * evaluation
+		 */
+		//runEvaluation(gateDocList);
+		
+	}
+	
+	/**
+	 * Run first pass pipeline. Prerequisite: PreProcess.java processed gate.Documents.
+	 * 
+	 * @param gateDocList list of gate.Documents
+	 */
+	private static void runFirstPassPipeline(ArrayList<gate.Document> gateDocList){
+		PassOne p1 = new PassOne();
+		for(gate.Document g: gateDocList){
+			p1.firstPassPipeline(g);
+			UrlNER.run(g);
+			EmailNER.run(g);
+		}
+	}
+	
+	/**
+	 * Run second pass pipeline. Prerequisite: PassOne.java processed gate.Documents.
+	 * (OBS!) Hard-coded entity types: Patient, Doctor, Idnum, Medicalrecord and Zip.
+	 * 
+	 * @param gateDocList list of gate.Documents
+	 */
+	private static void runSecondPassPipeline(ArrayList<gate.Document> gateDocList){
+		List<Input> inputList = new ArrayList<Input>();
+		String 	filter_pt = "(?i)take|po|i|fair|his|her|the|l|trace|seen|all|please|visit|pulses|ptt|physician|arthritis|depression|short|long|sick",	
+				filter_dr = "(?i)take|po|i|fair|md";
+		
+		inputList.add(new Input("NAME", "PATIENT", filter_pt));//NAME:PATIENT
+		inputList.add(new Input("NAME", "DOCTOR", filter_dr)); //NAME:DOCTOR
+		inputList.add(new Input("ID", "IDNUM", "")); 			//ID:IDNUM
+		inputList.add(new Input("ID", "MEDICALRECORD", ""));	//ID:MEDICALRECORD
+		inputList.add(new Input("LOCATION", "ZIP", ""));		//LOCATION:ZIP
+
+		PassTwo p2 = new PassTwo(gateDocList, inputList);
+		p2.secondPassPipeline();
+	}
+	
+	/**
+	 * Run post processing pipeline. Prerequisite: PassOne.java and PassTwo.java processed gate.Documents.
+	 * 
+	 * @param gateDocList
+	 */
+	private static void runPostProcessingPipeline(ArrayList<gate.Document> gateDocList){
+		PostProcess pp = new PostProcess(); //post processing
+		for(gate.Document g: gateDocList){
+			pp.postProcessingPipeline(g);
+		}
+	}
+
+	private static void saveNotes(File output_dir, ArrayList<gate.Document> gateDocList, String outputFormat) throws IOException{
+		for(gate.Document gateDoc: gateDocList){
+			if(outputFormat.equals("gatexml"))
+				FileUtils.writeStringToFile(new File(output_dir.getAbsolutePath()+"/"+gateDoc.getName().substring(0, gateDoc.getName().length()-3)+"xml"), Output.getGateXml(gateDoc), "UTF-8");
+			else
+				FileUtils.writeStringToFile(new File(output_dir.getAbsolutePath()+"/"+gateDoc.getName().substring(0, gateDoc.getName().length()-3)+"xml"), Output.getXml(gateDoc, "final_predictions"), "UTF-8");
+		}
+	}
+	
+	/**
+	 * Run evaluation.
+	 * 
+	 * @param gateDocList
+	 
+	private static void runEvaluation(ArrayList<gate.Document> gateDocList){
+		Evaluation e1 = new Evaluation(), 
+			 e2 = new Evaluation(),
+			 e3 = new Evaluation(),
+			 e4 = new Evaluation(),
+			 e5 = new Evaluation(),
+			 e6 = new Evaluation(),
+			 e7 = new Evaluation(),
+			 e8 = new Evaluation(),
+			 e9 = new Evaluation(),
+			 e10 = new Evaluation(),
+			 e11= new Evaluation(),
+			 e12 = new Evaluation(),
+			 e13 = new Evaluation(),
+			 e14 = new Evaluation();
+
+		for(gate.Document g: gateDocList){
+			e1.procDoc(g, "PATIENT", "gold_set", "PATIENT", "final_predictions", false);
+			e2.procDoc(g, "DOCTOR", "gold_set", "DOCTOR", "final_predictions", false);
+			e3.procDoc(g, "IDNUM", "gold_set", "IDNUM", "final_predictions", false);
+			e4.procDoc(g, "MEDICALRECORD", "gold_set", "MEDICALRECORD", "final_predictions", false);
+			e5.procDoc(g, "AGE", "gold_set", "AGE", "final_predictions", false);
+			e6.procDoc(g, "ZIP", "gold_set", "ZIP", "final_predictions", false);
+			e7.procDoc(g, "PHONE", "gold_set", "PHONE", "final_predictions", false);
+			e8.procDoc(g, "FAX", "gold_set", "FAX", "final_predictions", false);
+			e9.procDoc(g, "EMAIL", "gold_set", "EMAIL", "final_predictions", false);
+			e10.procDoc(g, "URL", "gold_set", "URL", "final_predictions", false);
+			e11.procDoc(g, "STREET", "gold_set", "STREET", "final_predictions", false);
+			e12.procDoc(g, "COUNTRY", "gold_set", "COUNTRY", "final_predictions", false);
+			e13.procDoc(g, "STATE", "gold_set", "STATE", "final_predictions", false);
+			e14.procDoc(g, "USERNAME", "gold_set", "USERNAME", "final_predictions", false);
+
+		}
+		System.out.println("PATIENT:");
+		e1.printEval();
+		System.out.println("DOCTOR:");
+		e2.printEval();
+		System.out.println("IDNUM:");
+		e3.printEval();
+		System.out.println("MEDICALRECORD:");
+		e4.printEval();
+		System.out.println("AGE:");
+		e5.printEval();
+		System.out.println("ZIP:");
+		e6.printEval();
+		System.out.println("PHONE:");
+		e7.printEval();
+		System.out.println("FAX:");
+		e8.printEval();
+		System.out.println("EMAIL:");
+		e9.printEval();
+		System.out.println("URL:");
+		e10.printEval();
+		System.out.println("STREET:");
+		e11.printEval();
+		System.out.println("COUNTRY:");
+		e12.printEval();
+		System.out.println("STATE:");
+		e13.printEval();
+		System.out.println("USERNAME:");
+		e14.printEval();
+	}
+	*/
+}
+
+class Document{
+	/**
+	 * Read given file format.
+	 * 
+	 * @param f file
+	 * @return gate.Document
+	 * @throws IOException
+	 * @throws ResourceInstantiationException
+	 * 
+	 * TODO: expand input formats ...
+	 */
+	public static gate.Document getGateDocument(File f) {
+		
+		PDFTextStripper stripper;
+		gate.Document gateDoc = null;
+		try {
+			stripper = new PDFTextStripper();
+
+			if(f.getName().endsWith(".txt")){
+				gateDoc = gate.Factory.newDocument(FileUtils.readFileToString(f, "UTF-8"));
+			}else if(f.getName().endsWith(".pdf")){
+				PDDocument d = PDDocument.load(f);
+				gateDoc = gate.Factory.newDocument(stripper.getText(d));
+			}else{
+				System.err.println("cDeid can only process plain text (.txt) and searchable portable document format (.pdf) documents.\n"
+								+  "Amend the filename extension accordingly.");
+				System.err.println("Document err: " + f.getName());
+		}
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (ResourceInstantiationException e) {
+			e.printStackTrace();
+		} 
+		
+	return gateDoc;
+	}
+	
+}
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/io/Output.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,62 @@
+package co.dehghan.cdeid.io;
+
+import gate.Annotation;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Generate Output.
+ */
+public class Output {
+
+	/**
+	 * Gate XML
+	 * 
+	 * @param gateDoc gate.Document
+	 * @param annotationSet annotation set where all annotations are stored
+	 * @return XML in String format
+	 */
+	public static String getXml(gate.Document gateDoc, String annotationSet)
+	{
+		String[] labels = { "PATIENT", "DOCTOR", "USERNAME",
+						"ZIP", "STATE", "STREET", "COUNTRY", 
+						"PHONE", "FAX", "URL", "EMAIL",
+						"AGE", "MEDICALRECORD", "IDNUM"};
+
+		String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
+					"<cDeid>\n" +
+					"<TEXT><![CDATA[" + gateDoc.getContent().toString() + "]]></TEXT>\n" +
+					"<TAGS>\n";
+		
+		int i=1;
+		for(String label: labels)
+		{
+			List<Annotation> ann = new ArrayList<Annotation>(gateDoc.getAnnotations(annotationSet).get(label));		
+			Collections.sort(ann, gate.Utils.OFFSET_COMPARATOR);
+			
+			for(Annotation a: ann)
+			{	
+				xml += "<"+a.getFeatures().get("CATEGORY").toString()+" id=\"P"+i+"\" "+"start=\"" + a.getStartNode().getOffset() + "\" " + "end=\""+ a.getEndNode().getOffset() +"\" " + "text=\""+ gate.Utils.stringFor(gateDoc, a) +"\" " + "TYPE=\""+a.getType().toString()+"\" />\n";
+			i++;
+			}
+		}		
+		xml += "</TAGS>\n" +
+				"</cDeid >";
+		
+		return xml;
+	}
+	
+	/**
+	 * Get GATE XML.
+	 * 
+	 * @param gateDoc gate.Document
+	 * @return gate.Document XML
+	 */
+	public static String getGateXml(gate.Document gateDoc){
+		return gateDoc.toXml();
+	}
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/PassOne.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,61 @@
+package co.dehghan.cdeid.pipeline.firstpass;
+
+import java.io.File;
+import java.io.IOException;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.util.GateException;
+import gate.util.persistence.PersistenceManager;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Saves annotations to annotation set: passOne
+ *
+ * See GATE/pipeline/passOne.xgapp
+ */
+public class PassOne {
+	
+	private static CorpusController passOne;
+	private static Corpus corpus;
+	
+	public PassOne(){
+		PassOne.init();
+	}
+	
+	private static void init(){
+		/*
+		 * initialize first pass pipeline 
+		 */
+		String path = "GATE/pipeline/passOne.xgapp";
+		try {
+			PassOne.passOne =(CorpusController)PersistenceManager.loadObjectFromFile(new File(path));
+			PassOne.corpus = Factory.newCorpus("c1");
+			PassOne.passOne.setCorpus(corpus);
+			
+		} catch (GateException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	/**
+	 * FirstPass pipeline.
+	 * 
+	 * @param gateDoc gate.Document
+	 */
+	public void firstPassPipeline(gate.Document gateDoc)
+	{		
+		try{
+			PassOne.corpus.add(gateDoc); 
+			PassOne.passOne.execute();
+		} catch (GateException e) {
+			System.err.println("Pipeline.firstPassPipeline(...): " + e.getMessage() );
+		}
+		PassOne.corpus.clear();
+		PassOne.passOne.cleanup();
+	}
+}
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/EmailNER.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,46 @@
+package co.dehghan.cdeid.pipeline.firstpass.ner;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import gate.util.InvalidOffsetException;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Saves annotations to annotation set: passOne
+ * 
+ * Email NER for GATE documents.
+ */
+public class EmailNER {
+
+	private static final String EMAIL_PATTERN = 
+			"[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9- ]+)*@"
+		  + "[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.(com|org|edu|gov|mil|co\\.uk))"; //expand
+		
+	public static void run(gate.Document gateDoc)
+	{
+		String text = gateDoc.getContent().toString();
+		Pattern p= Pattern.compile(EMAIL_PATTERN, Pattern.CASE_INSENSITIVE);
+		Matcher m = p.matcher(text);
+
+		while(m.find())
+		{
+			addAnnotation(gateDoc, m.start(), m.end());
+		}
+	}
+	
+	private static void addAnnotation(gate.Document gateDoc, int start, int end)
+	{
+		gate.FeatureMap gateMap = gate.Factory.newFeatureMap();
+		gateMap.put("CATEGORY", "CONTACT");
+	
+		    try {
+		    	//annotation set: final_predictions, category: CONTACT, label: EMAIL
+				gateDoc.getAnnotations("passOne").add((long)start, (long)end, "EMAIL", gateMap);
+		    } catch (NumberFormatException e) {
+				e.printStackTrace();
+			} catch (InvalidOffsetException e) {
+				e.printStackTrace();
+			}
+	}
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/firstpass/ner/UrlNER.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,45 @@
+package co.dehghan.cdeid.pipeline.firstpass.ner;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import gate.util.InvalidOffsetException;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Save annotations to annotation set: passOne
+ * 
+ * URL NER for GATE documents.
+ */
+public class UrlNER {
+	private static final String URL_PATTERN = 
+			"(https?://)?(www.)([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?(/[a-zA-Z0-9]+)?";
+		
+	public static void run(gate.Document gateDoc)
+	{
+		String text = gateDoc.getContent().toString();
+		Pattern p= Pattern.compile(URL_PATTERN, Pattern.CASE_INSENSITIVE);
+		Matcher m = p.matcher(text);
+
+		while(m.find())
+		{
+			addAnnotation(gateDoc, m.start(), m.end());
+		}
+	}
+	
+	private static void addAnnotation(gate.Document gateDoc, int start, int end)
+	{
+		gate.FeatureMap gateMap = gate.Factory.newFeatureMap();
+		gateMap.put("CATEGORY", "CONTACT");
+	
+		    try {
+		    	//annotation set: final_predictions, category: CONTACT, label: URL
+				gateDoc.getAnnotations("passOne").add((long)start, (long)end, "URL", gateMap);
+		    } catch (NumberFormatException e) {
+				e.printStackTrace();
+			} catch (InvalidOffsetException e) {
+				e.printStackTrace();
+			}
+	}
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PostProcess.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,60 @@
+package co.dehghan.cdeid.pipeline.other;
+
+import java.io.File;
+import java.io.IOException;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.util.GateException;
+import gate.util.persistence.PersistenceManager;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Saves annotations to annotation set: final_predictions
+ * 
+ * See GATE/pipeline/postProcessing.xgapp
+ */
+public class PostProcess {
+
+	private static CorpusController postProc;
+	private static Corpus corpus;
+	
+	public PostProcess(){
+		PostProcess.init();
+	}
+	
+	private static void init(){
+		/*
+		 * post-processing pipeline
+		 */
+		String path = "GATE/pipeline/postProcessing.xgapp";
+		try {
+			PostProcess.postProc = (CorpusController)PersistenceManager.loadObjectFromFile(new File(path));
+			PostProcess.corpus = Factory.newCorpus("c1");
+			PostProcess.postProc.setCorpus(corpus);
+		} catch (GateException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+	
+	/**
+	 * Post processing pipeline. 
+	 * 
+	 * @param gateDoc gate.Document
+	 */
+	public void postProcessingPipeline(gate.Document gateDoc)
+	{		
+		try{
+			PostProcess.corpus.add(gateDoc); 
+			PostProcess.postProc.execute();
+		} catch (GateException e) {
+			System.err.println("Pipeline.postProcessingPipeline(...): " + e.getMessage() );
+		}
+		PostProcess.corpus.clear();
+		PostProcess.postProc.cleanup();
+	}                      
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/other/PreProcess.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,84 @@
+package co.dehghan.cdeid.pipeline.other;
+
+import gate.Corpus;
+import gate.CorpusController;
+import gate.Factory;
+import gate.Gate;
+import gate.creole.CreoleRegisterImpl;
+import gate.util.GateException;
+import gate.util.Out;
+import gate.util.persistence.PersistenceManager;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * See GATE/pipeline/preProcessing.xgapp
+ */
+public class PreProcess {
+	
+	private static CorpusController preProc;
+	private static Corpus corpus;
+	
+	public PreProcess()
+	{		
+		PreProcess.init();
+	}
+	private static void init()
+	{		
+		Logger log = Logger.getLogger(CreoleRegisterImpl.class);
+		log.setLevel(Level.OFF);
+		
+		Out.prln("\n cDeid (US) v0.1, Copyright (C) 2015  Azad Dehghan");
+		Out.prln("\n.Initialising pipeline ...");
+		
+		try {
+			/*
+			 * init GATE
+			 */
+			Gate.setPluginsHome(new File("GATE/"));
+			Gate.setGateHome(new File("GATE/"));
+			Gate.runInSandbox(true);
+			Gate.init();
+						
+			/*
+			 * Init pre-processing pipeline:
+			 * 1.Tokenizer
+			 * 2.Sentence splitter 
+			 */
+			String path = "GATE/pipeline/preProcessing.xgapp";
+			preProc = (CorpusController) PersistenceManager.loadObjectFromFile(new File(path));
+			corpus = Factory.newCorpus("c1");
+			preProc.setCorpus(corpus);
+			
+		} catch (GateException e) {
+			System.err.println("Pipeline.initGate(): " + e.getMessage());
+			System.exit(1); 
+		} catch (IOException e) {
+			e.printStackTrace();
+		}	
+		Out.prln(".Initialisation completed ...");
+	}
+
+	/**
+	 * Pre-processing pipeline.
+	 * 
+	 * @param gateDoc gate.Document
+	 */
+	public void preProcessingPipeline(gate.Document gateDoc)
+	{		
+		try{
+			corpus.add(gateDoc); 
+			preProc.execute();
+		} catch (GateException e) {
+			System.err.println("Pipeline.preProcessingPipeline(...): " + e.getMessage() );
+		}
+	    corpus.clear();
+	    preProc.cleanup();
+	}
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/Input.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,49 @@
+package co.dehghan.cdeid.pipeline.secondpass;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Input for PassTwo.
+ */
+public class Input {
+
+	private String type;
+	private String category;
+	private String filter;
+	
+	/**
+	 * 
+	 * @param category CATEGORY
+	 * @param type TYPE
+	 * @param filter word filter separated 
+	 */
+	public Input(String category, String type, String filter){
+		this.category = category;
+		this.type = type;
+		this.filter = filter;
+	}
+	
+	public void setType(String type){
+		this.type = type;
+	}
+	
+	public String getType(){
+		return type;
+	}
+	
+	public void setCategory(String category){
+		this.category = category;
+	}
+	
+	public String getCategory(){
+		return category;
+	}
+	
+	public void setFilter(String filter){
+		this.filter = filter;
+	}
+	
+	public String getFilter(){
+		return filter;
+	}
+}
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/pipeline/secondpass/PassTwo.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,247 @@
+package co.dehghan.cdeid.pipeline.secondpass;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Factory;
+import gate.util.InvalidOffsetException;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import co.dehghan.cdeid.pipeline.secondpass.Overlap;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * Takes initial system annotations (first-pass) and compiles a run-time and patient-level dictionary and
+ * finally applies dictionary matching (second-pass) as final annotations.
+ * 
+ * Saves annotations to annotation set: passTwo
+ * 
+ * PassTwo.java v1.
+ */
+public class PassTwo {
+	
+	private static LinkedHashMap<String, String[]> dictionary; //patient-level dictionaries; k: patient-id, v: dictionary
+	private static ArrayList<gate.Document> gateDocList;
+	private static List<Input> inputList;
+	
+	/**
+	 * @param gateDocList list of gate.Documents.
+	 * @param pass2InputList list of inputs for the second pass.
+	 */
+	public PassTwo(ArrayList<gate.Document> gateDocList, List<Input> pass2InputList){
+		PassTwo.gateDocList = gateDocList;
+		PassTwo.inputList = pass2InputList;
+	}
+	
+	public void secondPassPipeline(){
+		//for each
+		for(Input in: inputList){
+			Extract e = new Extract();
+			for(gate.Document gateDoc: gateDocList){	
+				e.extractAnnotatedSpan(gateDoc, in.getType(), "passOne"); //gateDoc, label/type, annotationSet where the annotations are stored.
+			}
+			/* TESTING
+			 *///System.out.println(in.getType() + " dictionary size: " +e.getDictionary().size());
+			setPatientLevelDictionaries(e.getDictionary());
+			run(in.getType(), in.getCategory(), in.getFilter());
+		}
+	}
+
+	/**
+	 * Run second pass.
+	 */
+	private static void run(String label, String category, String filter)
+	{			
+		gate.FeatureMap gateMap;
+		
+		Pattern p = null;
+		Matcher m;
+		for(gate.Document gd: gateDocList)
+		{ 	
+			//create new annotations in the 'predictions' annotationSet
+			//set annotation feature: CATEGORY
+			gateMap = Factory.newFeatureMap();
+			gateMap.put("CATEGORY", category); 
+			
+			/* TESTING
+			 */  //System.out.println(gd.getName() +"\t"+ gd.getName().substring(0, gd.getName().indexOf("-"))); 
+				 //System.out.println(dictionary.size());
+				
+
+			//sanity check document contains initial/pass-one predictions
+			if(dictionary.containsKey(gd.getName().substring(0, gd.getName().indexOf("-")))){
+				//get patient-level dictionary
+				String[] ptLevelDictionary = dictionary.get(gd.getName().substring(0, gd.getName().indexOf("-")));
+
+				//get text 
+				String gateDocText = gd.getContent().toString();
+				
+				//NB: this may create overlapping annotations
+				for(String s: ptLevelDictionary)
+				{	
+					if(!s.matches(filter)){
+						p = Pattern.compile(s, Pattern.CASE_INSENSITIVE);
+						m = p.matcher(gateDocText);
+	
+						//for each match create a new annotation
+						while(m.find()){ //or matches?
+							addAnnotation(gd, gateMap, label, m.start(), m.end());
+						}
+					}	
+				}
+				//remove overlaps that may be propagated by the the dictionary matching
+				//TODO: proper longest match prediction would make these step unnecessary.
+				Overlap.rmOverlap(gd, label, "passTwo");
+				if(label.equals("PATIENT")||label.equals("DOCTOR")) 			
+					Overlap.rmOverlap(gd, label, "passTwo");
+			}
+		}
+	}
+	
+	/**
+	 * Create gate annotation.
+	 * 
+	 * @param gateDoc gate.Document
+	 * @param start offset
+	 * @param end offset
+	 */
+	private static void addAnnotation(gate.Document gateDoc, gate.FeatureMap gateMap, String label, int start, int end)
+	{
+		    try {
+				gateDoc.getAnnotations("passTwo").add((long)start, (long)end, label, gateMap);
+		    } catch (NumberFormatException e) {
+				e.printStackTrace();
+			} catch (InvalidOffsetException e) {
+				e.printStackTrace();
+			}
+	}
+
+	
+	private static void setPatientLevelDictionaries(LinkedHashMap<String, String> listDictionary){
+		PassTwo.dictionary = getAllPatientLevelDictionaries(listDictionary);
+	}
+	
+	/**
+	 * Pre-process 'raw' dictionaries compiled (with Extract.class) using pass-one annotations.
+	 * 
+	 * 1. convert LinkedHashMap<String, String> to LinkedHashMap<String, String[]>
+	 * 2. remove empty dictionary entries; remove newline characters; escape known special symbols: '.'
+	 * 3. Prepare entries for java.regex (word-boundary) matching
+	 * 4. remove duplicate dictionary entries
+	 * @param listDictionary k: patient-id, v: extracted NEs separated by '\n'
+	 * @return ready-to-use run-time dictionaries, k: patient-id, v: corresponding dictionary
+	 */
+	private static LinkedHashMap<String, String[]> getAllPatientLevelDictionaries(LinkedHashMap<String, String> listDictionary){
+		
+		LinkedHashMap<String, String[]> allPtDictionaries = new LinkedHashMap<String, String[]>();
+		Set<String> kSet = listDictionary.keySet();
+		for(String k: kSet){			
+			List<String> list = new ArrayList<String>(Arrays.asList(listDictionary.get(k).split("\n")));
+			
+			for(int i=0;i<list.size();i++){
+				if(list.get(i).equals("")) //remove empty entries
+					list.remove(i);
+				else{
+					list.set(i, list.get(i).trim().replaceAll("\n", "")); //remove \newline
+					list.set(i, list.get(i).replaceAll("\\.", "\\\\.")); //escape dots due to subseq. java.regex matching
+					list.set(i, "\\b"+list.get(i)+"\\b"); //word matching
+				}
+			}
+			//remove duplicate entries by converting to Set
+			LinkedHashSet<String> tempDic =  new LinkedHashSet<String>(list);
+			allPtDictionaries.put(k, tempDic.toArray(new String[0]));
+			
+		}
+	return allPtDictionaries;
+	}
+	
+}
+
+/**
+ * A class to compile an initial 'raw' dictionary.
+ */
+class Extract {
+
+	static LinkedHashMap<String, String> ListDictionary;
+	
+	public Extract()
+	{
+		ListDictionary = new LinkedHashMap<String, String>();
+	}
+	
+	/**
+	 * Extract annotations at the patient-level using file names as ID/Key and construct a 'raw' dictionary.
+	 * 
+	 * @param gateDoc gate.Document
+	 * @param type entity type label 
+	 * @param annSet annotation set where firstPass annotations are stored.
+	 */
+	public void extractAnnotatedSpan(gate.Document gateDoc, String type, String annSet)
+	{
+		AnnotationSet as = gateDoc.getAnnotations(annSet).get(type);
+		String tkey = gateDoc.getName().substring(0, gateDoc.getName().indexOf("-"));//L100-xx.xml -> L100
+		
+		/*  TESTING
+		 */ //System.out.println( type + "\t fileID:" + tkey + "\t AnnSet.size():" + as.size());	
+		
+		for(Annotation a: as)
+		{
+			if(!ListDictionary.containsKey(tkey))
+				ListDictionary.put(tkey,  "");
+			
+			if(ListDictionary.get(tkey).equals("")) //if empty
+				ListDictionary.put(tkey, gate.Utils.stringFor(gateDoc, a));
+			else
+				ListDictionary.put(tkey, ListDictionary.get(tkey) + "\n" + gate.Utils.stringFor(gateDoc, a));
+			
+		}		
+	}
+
+	/**
+	 * @return A 'raw' dictionary with k: patient-id , v: extracted annotations separated with "\n"
+	 */
+	public LinkedHashMap<String, String> getDictionary(){
+		return ListDictionary;
+	}
+}
+
+/**
+ * Remove overlapping annotation. Overlap needed for the poor dictionary matching implemented.
+ * 
+ * TODO: handle/remove overlap > 2 annotations
+ */
+class Overlap {
+	public static void rmOverlap(gate.Document gateDoc, String annType, String annSet)
+	{
+		//annType: annotation type to process 
+		ArrayList<Annotation> ann = new ArrayList<Annotation>(gateDoc.getAnnotations(annSet).get(annType));
+		Collections.sort(ann, gate.Utils.OFFSET_COMPARATOR);
+		
+		for(int i=0;i<ann.size() && i+1 != ann.size();i++){
+			for(int j=i+1;j<ann.size();j++) //j=next, i previous
+			{
+				if((ann.get(j).getStartNode().getOffset() >= ann.get(i).getStartNode().getOffset() && ann.get(j).getStartNode().getOffset() <= ann.get(i).getEndNode().getOffset()) 
+				   || 
+				   (ann.get(j).getEndNode().getOffset() <= ann.get(i).getEndNode().getOffset() && ann.get(j).getEndNode().getOffset() >= ann.get(i).getStartNode().getOffset()))
+				{
+					if((ann.get(i).getEndNode().getOffset() - ann.get(i).getStartNode().getOffset()) >= (ann.get(j).getEndNode().getOffset() - ann.get(j).getStartNode().getOffset())){	
+						gateDoc.getAnnotations(annSet).remove(ann.remove(j));
+							j--;
+					}
+					else{
+						gateDoc.getAnnotations(annSet).remove(ann.remove(i));
+					}
+				}
+			}
+		}
+	}	
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java?rev=1712083&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/co/dehghan/cdeid/ui/Deid.java Mon Nov  2 16:59:48 2015
@@ -0,0 +1,67 @@
+package co.dehghan.cdeid.ui;
+
+import gate.creole.ResourceInstantiationException;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+
+import co.dehghan.cdeid.controller.Controller;
+
+/**
+ * cDeid  Copyright (C) 2015  Azad Dehghan
+ * 
+ * User interface.
+ */
+public class Deid {
+
+	public static void main(String[] args) throws URISyntaxException, IOException, ResourceInstantiationException
+	{	
+		try {
+			parseCmdLine(args);
+		} catch (IOException e) {
+			System.err.println(e.getMessage());
+			usage();
+		}
+	}
+	
+	/**
+	 * print usage information.
+	 * 
+	 * @return
+	 */
+	private static String usage()
+	{
+		return "\n****************************************************************************************" +
+				"\n cDeid  Copyright (C) 2015  Azad Dehghan, v.0.1 (US)\n\n"
+
+				+ "Usage: java -jar Deid.jar [--xml|--gatexml] <input_dir> <output_dir>\n\n" +
+				"*****************************************************************************************\n";
+	}
+	
+	private static void parseCmdLine(String[] args) throws IOException, ResourceInstantiationException
+	{
+		Controller controller = new Controller();
+		
+		if(args.length < 2 || args.length > 3){
+			System.out.println(usage());
+		}
+		else if(args[0].toLowerCase().contentEquals("--gatexml"))
+		{	
+			String r_corpus = args[1];
+			String w_output = args[2];
+			controller.run(new File(r_corpus), new File(w_output), "gatexml");
+		} 
+		else if(args[0].toLowerCase().contentEquals("--xml"))
+		{	
+			String r_corpus = args[1];
+			String w_output = args[2];				
+			controller.run(new File(r_corpus), new File(w_output), "xml");
+		}
+		else if(args[0].toLowerCase().contentEquals("--help")||args[0].toLowerCase().contentEquals("-h"))
+			System.out.println(usage());
+		else
+			System.err.println(usage());
+	}
+}
+



Mime
View raw message