opennlp-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jörn Kottmann <kottm...@gmail.com>
Subject Re: svn commit: r1145449 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect: SentenceDetectorCrossValidatorTool.java SentenceDetectorEvaluatorTool.java
Date Tue, 12 Jul 2011 11:43:13 GMT
I did the same for Charset, but compared to a file that can also go wrong.

ArgumentParser.validateArguments now checks that the passed arguments 
are valid
in terms of the requested arguments.

ArgumentParser.parse now throws a TerminateToolException with an error 
message
when the argument value is invalid, e.g. not a number, charset does not 
exist.

Jörn

On 7/12/11 10:41 AM, Jörn Kottmann wrote:
> Hi William,
>
> the code looks good, I think we should improve the Argument Parser 
> also a little
> to make it more convenient to use.
>
> You need to write this:
>
> File trainingDataInFile = new File(params.getData());
>
>
> I will extend to Argument Parser to also understand "File" as a return
> type then you can write:
>
> File trainingDataInFile = params.getData();
>
>
> In the long term I would like to do much more parameter checking and 
> validation
> with annotations, because this a good way we to avoid code duplication.
>
> Jörn
>
> On 7/12/11 6:10 AM, colen@apache.org wrote:
>> Author: colen
>> Date: Tue Jul 12 04:10:51 2011
>> New Revision: 1145449
>>
>> URL: http://svn.apache.org/viewvc?rev=1145449&view=rev
>> Log:
>> OPENNLP-221 Refactored the evaluator and cross validator CLI tools of 
>> the SentenceDetector to use the Parameters interface. Please review. 
>> If it is OK I will do the same with the other tools
>>
>> Modified:
>>      
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
>>      
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
>>
>> Modified: 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
>> URL: 
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java?rev=1145449&r1=1145448&r2=1145449&view=diff
>> ============================================================================== 
>>
>> --- 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java

>> (original)
>> +++ 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java

>> Tue Jul 12 04:10:51 2011
>> @@ -19,7 +19,11 @@ package opennlp.tools.cmdline.sentdetect
>>
>>   import java.io.File;
>>   import java.io.IOException;
>> +import java.nio.charset.Charset;
>>
>> +import opennlp.tools.cmdline.ArgumentParser;
>> +import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
>> +import opennlp.tools.cmdline.BasicTrainingParametersI;
>>   import opennlp.tools.cmdline.CLI;
>>   import opennlp.tools.cmdline.CmdLineTool;
>>   import opennlp.tools.cmdline.CmdLineUtil;
>> @@ -30,6 +34,16 @@ import opennlp.tools.util.ObjectStream;
>>   import opennlp.tools.util.eval.FMeasure;
>>
>>   public final class SentenceDetectorCrossValidatorTool implements 
>> CmdLineTool {
>> +
>> +  /**
>> +   * Create a list of expected parameters.
>> +   */
>> +  interface Parameters extends BasicTrainingParametersI {
>> +
>> +    @ParameterDescription(valueName = "data")
>> +    String getData();
>> +
>> +  }
>>
>>     public String getName() {
>>       return "SentenceDetectorCrossValidator";
>> @@ -40,40 +54,37 @@ public final class SentenceDetectorCross
>>     }
>>
>>     public String getHelp() {
>> -    return "Usage: " + CLI.CMD + " " + getName() + " " + 
>> TrainingParameters.getParameterUsage() +
>> -        " -data trainData\n" +
>> -        TrainingParameters.getDescription();
>> +    return "Usage: " + CLI.CMD + " " + getName() + " " + 
>> ArgumentParser.createUsage(Parameters.class);
>>     }
>>
>>     public void run(String[] args) {
>> -    if (args.length<  5) {
>> -      System.out.println(getHelp());
>> +
>> +    if (!ArgumentParser.validateArguments(args, Parameters.class)) {
>> +      System.err.println(getHelp());
>>         throw new TerminateToolException(1);
>>       }
>>
>> -    TrainingParameters parameters = new TrainingParameters(args);
>> +    Parameters params = ArgumentParser.parse(args, Parameters.class);
>>
>> -    if(!parameters.isValid()) {
>> -      System.out.println(getHelp());
>> -      throw new TerminateToolException(1);
>> -    }
>>
>>       opennlp.tools.util.TrainingParameters mlParams =
>> -      
>> CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args), 
>> false);
>> +      CmdLineUtil.loadTrainingParameters(params.getParams(), false);
>>
>> -    File trainingDataInFile = new 
>> File(CmdLineUtil.getParameter("-data", args));
>> +    File trainingDataInFile = new File(params.getData());
>>       CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
>>
>> +    Charset encoding = Charset.forName(params.getEncoding());
>> +
>>       ObjectStream<SentenceSample>  sampleStream = 
>> SentenceDetectorTrainerTool.openSampleData("Training Data",
>> -        trainingDataInFile, parameters.getEncoding());
>> +        trainingDataInFile, encoding);
>>
>>       SDCrossValidator validator;
>>
>>       if (mlParams == null) {
>> -      validator = new SDCrossValidator(parameters.getLanguage(), 
>> parameters.getCutoff(), parameters.getNumberOfIterations());
>> +      validator = new SDCrossValidator(params.getLang(), 
>> params.getCutoff(), params.getIterations());
>>       }
>>       else {
>> -      validator = new SDCrossValidator(parameters.getLanguage(), 
>> mlParams);
>> +      validator = new SDCrossValidator(params.getLang(), mlParams);
>>       }
>>
>>       try {
>>
>> Modified: 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
>> URL: 
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java?rev=1145449&r1=1145448&r2=1145449&view=diff
>> ============================================================================== 
>>
>> --- 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java

>> (original)
>> +++ 
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java

>> Tue Jul 12 04:10:51 2011
>> @@ -21,6 +21,9 @@ import java.io.File;
>>   import java.io.IOException;
>>   import java.nio.charset.Charset;
>>
>> +import opennlp.tools.cmdline.ArgumentParser;
>> +import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
>> +import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
>>   import opennlp.tools.cmdline.CLI;
>>   import opennlp.tools.cmdline.CmdLineTool;
>>   import opennlp.tools.cmdline.CmdLineUtil;
>> @@ -31,6 +34,22 @@ import opennlp.tools.sentdetect.Sentence
>>   import opennlp.tools.util.ObjectStream;
>>
>>   public final class SentenceDetectorEvaluatorTool implements 
>> CmdLineTool {
>> +
>> +  /**
>> +   * Create a list of expected parameters.
>> +   */
>> +  interface Parameters {
>> +
>> +    @ParameterDescription(valueName = "charsetName", description = 
>> "specifies the encoding which should be used for reading and writing 
>> text")
>> +    @OptionalParameter(defaultValue="UTF-8")
>> +    String getEncoding();
>> +
>> +    @ParameterDescription(valueName = "model")
>> +    String getModel();
>> +
>> +    @ParameterDescription(valueName = "data")
>> +    String getData();
>> +  }
>>
>>     public String getName() {
>>       return "SentenceDetectorEvaluator";
>> @@ -41,25 +60,28 @@ public final class SentenceDetectorEvalu
>>     }
>>
>>     public String getHelp() {
>> -    return "Usage: " + CLI.CMD + " " + getName() + " -encoding 
>> charset -model model -data testData";
>> +    return "Usage: " + CLI.CMD + " " + getName() + " " + 
>> ArgumentParser.createUsage(Parameters.class);
>>     }
>>
>>     public void run(String[] args) {
>> -    if (args.length != 6) {
>> -      System.out.println(getHelp());
>> +
>> +    if (!ArgumentParser.validateArguments(args, Parameters.class)) {
>> +      System.err.println(getHelp());
>>         throw new TerminateToolException(1);
>>       }
>>
>> -    Charset encoding = CmdLineUtil.getEncodingParameter(args);
>> +    Parameters params = ArgumentParser.parse(args, Parameters.class);
>> +
>> +    Charset encoding = Charset.forName(params.getEncoding());
>>
>>       if (encoding == null) {
>>         System.out.println(getHelp());
>>         throw new TerminateToolException(1);
>>       }
>>
>> -    SentenceModel model = new SentenceModelLoader().load(new 
>> File(CmdLineUtil.getParameter("-model", args)));
>> +    SentenceModel model = new SentenceModelLoader().load(new 
>> File(params.getModel()));
>>
>> -    File trainingDataInFile = new 
>> File(CmdLineUtil.getParameter("-data", args));
>> +    File trainingDataInFile = new File(params.getData());
>>       CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
>>
>>       opennlp.tools.sentdetect.SentenceDetectorEvaluator evaluator =
>>
>>
>


Mime
View raw message