uima-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Josep María Formentí Serra <jmforme...@aia.es>
Subject Re: Ruta problem with spaces and special chars
Date Tue, 04 Jul 2017 07:00:43 GMT
Thanks Peter. It's just a class, I put the code here and sent you directly

============================================================================
import static
org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.net.URISyntaxException;

import org.antlr.runtime.RecognitionException;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.ruta.descriptor.RutaBuildOptions;
import org.apache.uima.ruta.descriptor.RutaDescriptorFactory;
import org.apache.uima.ruta.descriptor.RutaDescriptorInformation;
import org.apache.uima.ruta.engine.RutaEngine;
import org.junit.Test;

public class RutaAnnotatorTest {

    // We try to create a Detection because an Attribute (" hello")
contains a
    // detected value ("hello")
    @Test
    public void testSpaceProblem() throws UIMAException, IOException,
RecognitionException, URISyntaxException {
        // Prepare data
        String str = "attr: hello";
        String rutaRule = "PACKAGE ruta;\n" //
                + "DECLARE Detection;\n" //
                + "DECLARE DetectedValue;\n" //
                + "DECLARE Attribute;\n" //
                + "BOOLEAN located;\n" //
                + "BLOCK(doc) Document{} {\n" //
                + "     Document{ -> located = false};\n" //
                + "        a1:Attribute{} -> {" //
                + "            d1:DetectedValue{ -> located = true};" //
                + "        };" //
                + "        Document{located -> CREATE(Detection)};\n"//
                + "}\n"; //

        // Prepare CAS
        RutaDescriptorFactory factory = new RutaDescriptorFactory();
        RutaDescriptorInformation descriptorInformation =
factory.parseDescriptorInformation(rutaRule);
        RutaBuildOptions options = new RutaBuildOptions();
        TypeSystemDescription typeSystemDescription =
factory.createTypeSystemDescription("", descriptorInformation,
                options, null);

        JCas cas = JCasFactory.createJCas(typeSystemDescription);
        cas.setDocumentText(str);

        Type attrType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Attribute");
        AnnotationFS attr = cas.getCas().createAnnotation(attrType, 4, 10);
        cas.addFsToIndexes(attr);

        Type detectedValueType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.DetectedValue");
        AnnotationFS detectedValue =
cas.getCas().createAnnotation(detectedValueType, 5, 10);
        cas.addFsToIndexes(detectedValue);

        // Execute Ruta
        AnalysisEngineDescription ruta =
createEngineDescription(RutaEngine.class, RutaEngine.PARAM_RULES, rutaRule);
        AnalysisEngine pipe = UIMAFramework.produceAnalysisEngine(ruta);
        pipe.process(cas);

        // Validate result
        Type detectionType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Detection");
        assertEquals(1, CasUtil.select(cas.getCas(), detectionType).size());
    }

    // We try to create a Detection because an Attribute ("\" hello\"")
contains a
    // detected value ("llo")
    @Test
    public void testQuoteProblem() throws UIMAException, IOException,
RecognitionException, URISyntaxException {
        // Prepare data
        String str = "attr: \" hello\"";
        String rutaRule = "PACKAGE ruta;\n" //
                + "DECLARE Detection;\n" //
                + "DECLARE DetectedValue;\n" //
                + "DECLARE Attribute;\n" //
                + "BOOLEAN located;\n" //
                + "BLOCK(doc) Document{} {\n" //
                + "     Document{ -> located = false};\n" //
                + "        a1:Attribute{} -> {" //
                + "            d1:DetectedValue{ -> located = true};" //
                + "        };" //
                + "        Document{located -> CREATE(Detection)};\n"//
                + "}\n"; //

        // Prepare CAS
        RutaDescriptorFactory factory = new RutaDescriptorFactory();
        RutaDescriptorInformation descriptorInformation =
factory.parseDescriptorInformation(rutaRule);
        RutaBuildOptions options = new RutaBuildOptions();
        TypeSystemDescription typeSystemDescription =
factory.createTypeSystemDescription("", descriptorInformation,
                options, null);

        JCas cas = JCasFactory.createJCas(typeSystemDescription);
        cas.setDocumentText(str);

        Type attrType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Attribute");
        AnnotationFS attr = cas.getCas().createAnnotation(attrType, 5, 12);
        cas.addFsToIndexes(attr);

        Type detectedValueType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.DetectedValue");
        AnnotationFS detectedValue =
cas.getCas().createAnnotation(detectedValueType, 9, 12);
        cas.addFsToIndexes(detectedValue);

        // Execute Ruta
        AnalysisEngineDescription ruta =
createEngineDescription(RutaEngine.class, RutaEngine.PARAM_RULES, rutaRule);
        AnalysisEngine pipe = UIMAFramework.produceAnalysisEngine(ruta);
        pipe.process(cas);

        // Validate result
        Type detectionType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Detection");
        assertEquals(1, CasUtil.select(cas.getCas(), detectionType).size());
    }

}
============================================================================

2017-07-03 20:50 GMT+02:00 Peter Klügl <peter.kluegl@averbis.com>:

> Hi,
>
>
> I think this mailing list does not allow mail attachments, at least I do
> not see any.
>
>
> Can you upload the tests anywhere and post the links here? Or you can send
> the test directly to me. Or you can create a Jira issue and attach them
> there: https://issues.apache.org/jira/browse/UIMA-5474?jql=project%
> 20%3D%20UIMA%20AND%20component%20%3D%20Ruta
>
>
> Best,
>
>
> Peter
>
>
>
> Am 03.07.2017 um 14:47 schrieb Josep María Formentí Serra:
>
>> Hi,
>>
>>   We've experimented some problems applying rules in texts that contains
>> spaces or special chars, in texts that contains some spaces or special
>> chars the rules are not applied properly.
>>
>>   As example of this problems I attach 2 tests, these tests are using a
>> simplification of the kind of rules that we are using in our project.
>>
>> Best,
>>   JM
>>
>
>


-- 
------------------------------------------------------------------- --- --
- - -
*Grupo AIA* - *www.aia.es <http://www.aia.es> *
Josep Mª Formentí Serra       <jmformenti@aia.ptv.es>
*jmformenti@aia.es <jmformenti@aia.ptv.es>*Dpto. Servicios Financieros y
Seguros
ESADECREAPOLIS, Sant Cugat, Barcelona
Telf.: +34 93 504 49 00 Fax.: +34 93 580 21 88
------------------------------------------------------------------- --- --
- - -
The information transmitted is intended only for the person or entity to
which it is addressed and may contain confidential and/or privileged
material. Any review, retransmission, dissemination or other use of, or
taking of any action in reliance upon, this information by persons or
entities other than the intended recipient is prohibited. If you received
this in error, please contact the sender and delete the material from any
computer.

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message