uima-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ISAO Isaac Saito" <1...@1995.sfc.ne.jp>
Subject Re: Question: Array of String as a feature
Date Tue, 20 Nov 2007 07:36:54 GMT
Thanks Marshall,
I attach the stack trace and java code below.

<exception>
Exception in thread "main" java.lang.NullPointerException
	at org.apache.uima.cas.impl.CASImpl.ll_getFSForRef(CASImpl.java:2745)
	at jp.ac.keio.dmc.context.srw.types.PersonWikip.getCompany_related(PersonWikip.java:233)
	at jp.ac.keio.dmc.context.srw.SRWikipediaMain.extractNeeFromText(SRWikipediaMain.java:120)
	at jp.ac.keio.dmc.context.srw.SRWikipediaMain.extractNeeFromWebdoc(SRWikipediaMain.java:85)
	at jp.ac.keio.dmc.context.srw.SRWikipediaMain.extractNamedentity(SRWikipediaMain.java:67)
	at jp.ac.keio.dmc.context.srw.SRWikipediaMain.<init>(SRWikipediaMain.java:47)
	at jp.ac.keio.dmc.context.srw.SRWikipediaMain.main(SRWikipediaMain.java:210)
</exception>


- PersonWikip.java
I don't attach because it is nothing special that is generated
automatically after defining Type System Definition on Component
descriptor editor.


- whole code of SRWikipediaMain.java
# You might feel the code verbose because I paste the whole...

<SRWikipediaMain.java>
1: package jp.ac.keio.dmc.context.srw;
2:
3: import java.io.IOException;
4: import java.util.LinkedList;
5:
6: import jp.ac.keio.dmc.context.srw.types.PersonWikip;
7: import jp.ac.keio.dmc.context.util.http.DMCHttpClient;
8: import net.lateeye.util.uima.AnalysisEngineUtil;
9:
10: import org.apache.uima.analysis_engine.AnalysisEngine;
11: import org.apache.uima.cas.FSIndex;
12: import org.apache.uima.cas.FSIterator;
13: import org.apache.uima.jcas.JCas;
14: import org.apache.uima.jcas.cas.StringArray;
15: import org.apache.uima.util.XMLInputSource;
16:
17: import com.ibm.omnifind.ne.types.Company;
18: import com.ibm.omnifind.ne.types.Org;
19:
20: public class SRWikipediaMain {
21: 	private static String path_eclipseWorkspace_root =
"C:/data/app/workspace_eclipse/";
22: 	private static String path_ibmNeeDesc = path_eclipseWorkspace_root
23: 			+ "tae_nee_apacheUima/" + "desc/";
24: 	private static String path_srwAeDesc = path_eclipseWorkspace_root
25: 			+ "srwikipedia/" + "descriptor/";
26: 	private static String uimaDesc_personWikip = path_srwAeDesc
27: 			+ "KeioWikipAnnotatorDescriptor.xml";
28: 	private static String uimaDesc_ibmNee = path_ibmNeeDesc
29: 			+ "NamedEntityAnnotatorDescriptor.xml";
30: 	private static String uimaDesc_wikipPersonNee = path_srwAeDesc
31: 			+ "WikipPersonNeeAnnotatorDescriptor.xml";
32: 	// private LinkedList<PersonWikip> personList = null;
33: 	private DMCHttpClient dhc = new DMCHttpClient();
34:
35: 	public SRWikipediaMain() {
36: 		// extract keio person from wikipedia
37: 		LinkedList<PersonWikip> personList = this.extractKeioPersonWikipedia();
38:
39: 		// extract NEs using personList
40: 		personList = this.extractNamedentity(personList);
41:
42: 		// update 2 RDB
43: 		this.update_SRW_db(personList);
44: 	}
45:
46: 	private void update_SRW_db(LinkedList<PersonWikip> persons) {
47: 		// TODO Auto-generated method stub
48: 	}
49:
50: 	private LinkedList<PersonWikip> extractNamedentity(
51: 			LinkedList<PersonWikip> persons) {
52:
53: 		LinkedList<PersonWikip> newList = new LinkedList<PersonWikip>();
54: 		for (int i = 0; i < persons.size(); i++) {
55: 			PersonWikip person = persons.get(i);
56:
57: 			System.out.println("SRWikipediaMain:extractNE: "
58: 					+ person.getPersonname().getLex());
59:
60: 			person = this.extractNeeFromWebdoc(person, person.getUrl_wikip());
61:
62: 			// 2007nov19/130s/for saving memory
63: 			// newList.add(person);
64: 			person = null;
65: 		}
66: 		return newList;
67: 	}
68:
69: 	private PersonWikip extractNeeFromWebdoc(PersonWikip person, String uri) {
70:
71: 		String htmlBody = null;
72: 		try {
73: 			htmlBody = this.dhc.doGetBody(uri);
74: 		} catch (IllegalArgumentException e) {
75: 			// occur when the person name is not enclosed by <a> tag.
76: 		}
77:
78: 		return this.extractNeeFromText(person, htmlBody);
79: 	}
80:
81: 	private PersonWikip extractNeeFromText(PersonWikip person, String
htmlBody) {
82: 		if (htmlBody != null) {
83: 			// read component descriptor for IBM NEE
84: 			XMLInputSource in = null;
85: 			try {
86: 				in = new XMLInputSource(SRWikipediaMain.uimaDesc_ibmNee);
87: 			} catch (IOException e) {
88: 				e.printStackTrace();
89: 			}
90:
91: 			// give html to analysis engine
92: 			AnalysisEngine ae = AnalysisEngineUtil.getAeInstance(in);
93: 			JCas jcas = AnalysisEngineUtil
94: 					.analyzeByAnalysisEngine(ae, htmlBody);
95:
96: 			// set firm
97: 			System.out.print(" - firm: ");
98:
99: 			FSIndex index = jcas.getJFSIndexRepository().getAnnotationIndex(
100: 					Company.type);
101: 			FSIterator compIter = index.iterator();
102:
103: 			StringArray compArray = new StringArray(jcas, index.size());
104: 			int num_company = 0;
105: 			while (compIter.hasNext()) {
106: 				Company annot = (Company) compIter.next();
107: 				compArray.set(num_company, annot.getLex());
108: 				// TODO remove redundunt values
109:
110: 				num_company++;
111: 			}
112: 			person.setCompany_related(compArray);
113: 			System.out.print(person.getCompany_related().toArray() + " ");
114:
115: 			// set org
116: 			System.out.println("");
117: 			System.out.print(" - org: ");
118:
119: 			FSIterator orgIter = jcas.getJFSIndexRepository()
120: 					.getAnnotationIndex(Org.type).iterator();
121: 			StringArray orgArray = new StringArray(jcas, 100);
122: 			int order_org = 0;
123: 			while (orgIter.hasNext()) {
124: 				Org annot = (Org) orgIter.next();
125: 				orgArray.set(order_org, annot.getLex());
126:
127: 				order_org++;
128: 			}
129: 			person.setCompany_related(orgArray);
130: 			System.out.print(person.getOrg_related().toArray() + " ");
131:
132: 			// read component descriptor for WikipPerson NEE
133: 			in = null;
134: 			jcas = null;
135: 			try {
136: 				in = new XMLInputSource(SRWikipediaMain.uimaDesc_wikipPersonNee);
137: 			} catch (IOException e) {
138: 				e.printStackTrace();
139: 			}
140: 			// give html to analysis engine
141: 			jcas = AnalysisEngineUtil.analyzeByAnalysisEngine(
142: 					AnalysisEngineUtil.getAeInstance(in), htmlBody);
143:
144: 			// set faculty
145: 			// set schoold graduated
146: 			// set pref. candidate from
147: 			System.out.println("");
148: 		}
149:
150: 		// TODO write 2 data base
151:
152: 		return person;
153: 	}
154:
155: 	private String url_keiopersons_wikipedia =
"http://ja.wikipedia.org/wiki/%E6%85%B6%E6%87%89%E7%BE%A9%E5%A1%BE%E5%A4%A7%E5%AD%A6%E3%81%AE%E4%BA%BA%E7%89%A9%E4%B8%80%E8%A6%A7";
156:
157: 	private LinkedList<PersonWikip> extractKeioPersonWikipedia() {
158:
159: 		XMLInputSource in = null;
160: 		try {
161: 			in = new XMLInputSource(SRWikipediaMain.uimaDesc_personWikip);
162: 		} catch (IOException e) {
163: 			e.printStackTrace();
164: 		}
165: 		// create a JCas, given an Analysis Engine (ae)
166: 		AnalysisEngine ae = AnalysisEngineUtil.getAeInstance(in);
167: 		String docBody = this.dhc.doGetBody(this.url_keiopersons_wikipedia);
168: 		JCas jcas = AnalysisEngineUtil.analyzeByAnalysisEngine(ae, docBody);
169:
170: 		LinkedList<PersonWikip> personList = this.createPersonList(jcas);
171: 		System.out.println("extractKeioPersonWikipedia: " + personList.size());
172:
173: 		// 2007nov15/130s/this caused fatal error!!!
174: 		// jcas.reset();
175:
176: 		return personList;
177: 	}
178:
179: 	private LinkedList<PersonWikip> createPersonList(JCas jcas) {
180: 		LinkedList<PersonWikip> persons = new LinkedList<PersonWikip>();
181:
182: 		FSIterator personIter = jcas.getJFSIndexRepository()
183: 				.getAnnotationIndex(PersonWikip.type).iterator();
184: 		int counter = 0;
185: 		while (personIter.hasNext()) {
186: 			PersonWikip p = (PersonWikip) personIter.next();
187: 			persons.add(p);
188: 			System.out
189: 					.println("SRWikipediaMain:createPersonList: "
190: 							+ p.getPersonname().getLex() + ", URL: "
191: 							+ p.getUrl_wikip());
192: 			counter++;
193: 		}
194: 		return persons;
195: 	}
196:
197: 	public static void main(String[] args) {
198: 		new SRWikipediaMain();
199: 	}
200: }
</SRWikipediaMain.java>

Isaac


On 11/20/07, Marshall Schor <msa@schor.com> wrote:
> Can you please post the stack trace so we can see the nesting of method
> calls leading to the null pointer exception?
>
> Thanks. -Marshall
>
> ISAO Isaac Saito wrote:
> > Thilo,
> >
> > Thank you for the comment.
> > I took following steps but the result is the same.
> >
> > 1. set /uimaj-2.2.0-incubating-bin/apache-uima/lib as a classpath
> > 2. clean compiled classes and build
> >
> > any comments will be appreciated.
> >
> > Isaac
> >
> > On 11/20/07, Thilo Goetz <twgoetz@gmx.de> wrote:
> >
> >> Hi,
> >>
> >> this looks like you may be referring to an old version
> >> of your JCas classes in your classpath.  The invocation
> >> exception may be caused because the old version of the
> >> Person class doesn't have the company feature yet.  Please
> >> check your classpath and make sure the classes you run
> >> with are the same ones you compile against.
> >>
> >> --Thilo
> >>
> >> ISAO Isaac Saito wrote:
> >>
> >>> Hi all,
> >>>
> >>> Could anybody has a solution to the problem I describe below?
> >>>
> >>> Thanks in adv,
> >>>  Isaac
> >>>
> >>>
> >>> - What I Want:
> >>>  set an array of String as a feature and get them
> >>>
> >>>
> >>> - Phenomenon:
> >>>
> >>>  -- Exception occurs and application terminates after executing
> >>> System.out.print(person.getCompany_related().toArray())
> >>>
> >>>  Exception in thread "main" java.lang.NullPointerException
> >>>       at org.apache.uima.cas.impl.CASImpl.ll_getFSForRef(CASImpl.java:2745)
> >>>
> >>>
> >>>  -- the content of variable shows "com.sun.jdi.InvocationException
> >>> occurred invoking method." after executing
> >>> person.setCompany_related(compArray).
> >>>   This can be seen by using eclipse' debug mode.
> >>>
> >>>
> >>> - java code
> >>>
> >>> private void extractNeeFromText(String descpath, String htmlBody,
> >>> Annotation person) {
> >>>       XMLInputSource in = null;
> >>>       try {
> >>>               in = new XMLInputSource(descpath);
> >>>       } catch (IOException e) {
> >>>               e.printStackTrace();
> >>>       }
> >>>       AnalysisEngine ae = AnalysisEngineUtil.getAeInstance(in);
> >>>       ResourceSpecifier specifier = null;
> >>>       AnalysisEngine ae = null;
> >>>       try {
> >>>               specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
> >>>                                       desc);
> >>>               ae = UIMAFramework.produceAnalysisEngine(specifier);
> >>>       } catch (InvalidXMLException e) {
> >>>               e.printStackTrace();
> >>>       } catch (ResourceInitializationException e) {
> >>>               e.printStackTrace();
> >>>       }
> >>>               JCas jcas = AnalysisEngineUtil.analyzeByAnalysisEngine(ae,
htmlBody);
> >>>       FSIndex index = jcas.getJFSIndexRepository().getAnnotationIndex(
> >>>                               Company.type);
> >>>       StringArray compArray = new StringArray(jcas, index.size());
> >>>       FSIterator compIter = index.iterator();
> >>>       int num_company = 0;
> >>>       while (compIter.hasNext()) {
> >>>               Company annot = (Company) compIter.next();
> >>>               compArray.set(num_company, annot.getLex());
> >>>               // TODO remove redundunt values
> >>>               num_company++;
> >>>       }
> >>>       person.setCompany_related(compArray);
> >>>       System.out.println(person.getCompany_related().toArray());
> >>> }
> >>>
> >>>
> >>> - type system desctiption
> >>>
> >>> <?xml version="1.0" encoding="UTF-8"?>
> >>> <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
> >>> <name>Person-Wikip_TypeSystemDescriptor</name>
> >>> <description></description>
> >>> <version>1.0</version>
> >>> <vendor>ISAAC</vendor>
> >>> <imports>
> >>> <import location="../../tae_nee_apacheUima/desc/NamedEntityTypeSystemDescriptor.xml"/>
> >>> </imports>
> >>> <types>
> >>> <typeDescription>
> >>> <name>jp.ac.keio.dmc.context.srw.types.PersonWikip</name>
> >>> <description>w</description>
> >>> <supertypeName>uima.tcas.Annotation</supertypeName>
> >>> <features>
> >>> <featureDescription>
> >>> <name>company_related</name>
> >>> <description></description>
> >>> <rangeTypeName>uima.cas.StringArray</rangeTypeName>
> >>> </featureDescription>
> >>> </features>
> >>> </typeDescription>
> >>> </types>
> >>> </typeSystemDescription>
> >>>
> >>>
> >>> - Referred conversation
> >>> http://www.ibm.com/developerworks/forums/thread.jspa?threadID=129534
> >>>
> >
> >
> >

Mime
View raw message