Author: mreutegg Date: Fri Feb 6 10:48:56 2009 New Revision: 741524 URL: http://svn.apache.org/viewvc?rev=741524&view=rev Log: JCR-1970: populate.jsp uses Java 1.5 method Modified: jackrabbit/trunk/jackrabbit-webapp/src/main/webapp/populate.jsp Modified: jackrabbit/trunk/jackrabbit-webapp/src/main/webapp/populate.jsp URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-webapp/src/main/webapp/populate.jsp?rev=741524&r1=741523&r2=741524&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-webapp/src/main/webapp/populate.jsp (original) +++ jackrabbit/trunk/jackrabbit-webapp/src/main/webapp/populate.jsp Fri Feb 6 10:48:56 2009 @@ -13,30 +13,30 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ---%><%@ page import="org.apache.jackrabbit.j2ee.RepositoryAccessServlet, - org.apache.jackrabbit.util.Text, - javax.jcr.Node, - javax.jcr.Repository, - javax.jcr.Session, - javax.jcr.SimpleCredentials, - javax.swing.text.AttributeSet, - javax.swing.text.html.HTML, - javax.swing.text.html.HTMLDocument, - javax.swing.text.html.HTMLEditorKit, - java.io.FilterInputStream, - java.io.IOException, - java.io.InputStream, - java.io.InputStreamReader, - java.net.URL, - java.net.URLConnection, - java.net.URLDecoder, - java.net.URLEncoder, - java.util.ArrayList, - java.util.Arrays, - java.util.Calendar, - java.util.Collections, - java.util.Iterator, - java.util.List" +--%><%@ page import="java.io.FilterInputStream, + java.io.IOException, + java.io.InputStream, + java.io.InputStreamReader, + java.net.URL, + java.net.URLConnection, + java.net.URLDecoder, + java.net.URLEncoder, + java.util.ArrayList, + java.util.Arrays, + java.util.Calendar, + java.util.Collections, + java.util.Iterator, + java.util.List, + javax.jcr.Node, + javax.jcr.Repository, + javax.jcr.Session, + javax.jcr.SimpleCredentials, + javax.swing.text.AttributeSet, + javax.swing.text.html.HTML, + javax.swing.text.html.HTMLDocument, + javax.swing.text.html.HTMLEditorKit, + org.apache.jackrabbit.j2ee.RepositoryAccessServlet, + org.apache.jackrabbit.util.Text" %><%@ page contentType="text/html;charset=UTF-8" %><% Repository rep; Session jcrSession; @@ -66,9 +66,9 @@ } String[] types = request.getParameterValues("filetype"); if (types != null) { - for (int i = 0; i < types.length; i++) { - filetypes.add(types[i]); - } + filetypes.addAll(Arrays.asList(types)); + } else { + filetypes = DEFAULT_TYPES; } if (seedWord != null && numDocs > 0 && filetypes.size() > 0) { %> @@ -147,7 +147,7 @@ String type = (String) filetypes.get(typeIdx); int offset = 0; while (n < numDocs * (typeIdx + 1) / filetypes.size()) { - final URL[] urls = new Search(type, seedWord, offset).getURLs(out); + final URL[] urls = new Search(type, seedWord, offset).getURLs(); if (urls.length == 0) { break; } @@ -185,7 +185,6 @@ try { String info = fileName + " (" + host + ")"; URLConnection con = currentURL.openConnection(); - con.setReadTimeout(10000); // 10 seconds InputStream in = con.getInputStream(); try { synchronized (fOut) { @@ -261,7 +260,7 @@

- + @@ -275,21 +274,9 @@ } } %><%! - public Iterator getDocuments(String mimeType, String searchTerm) { - return new Iterator() { - public boolean hasNext() { - return false; - } - public Object next() { - return null; - } - - public void remove() { - throw new UnsupportedOperationException(); - } - }; - } + public static final List DEFAULT_TYPES = Arrays.asList( + new String[]{"pdf", "rtf", "doc", "ppt", "xls"}); public static class Search { @@ -305,41 +292,37 @@ this.start = start; } - public URL[] getURLs(JspWriter out) throws Exception { + public URL[] getURLs() throws Exception { + List urls = new ArrayList(); + String query = term + " filetype:" + filetype; + URL google = new URL("http://www.google.com/search?q=" + + URLEncoder.encode(query, "UTF-8") + "&start=" + start); + URLConnection con = google.openConnection(); + con.setRequestProperty("User-Agent", ""); + InputStream in = con.getInputStream(); try { - List urls = new ArrayList(); - String query = term + " filetype:" + filetype; - URL google = new URL("http://www.google.com/search?q=" + - URLEncoder.encode(query, "UTF-8") + "&start=" + start); - URLConnection con = google.openConnection(); - con.setRequestProperty("User-Agent", ""); - InputStream in = con.getInputStream(); - try { - HTMLEditorKit kit = new HTMLEditorKit(); - HTMLDocument doc = new HTMLDocument(); - doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); - kit.read(new InputStreamReader(in, "UTF-8"), doc, 0); - HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A); - while (it.isValid()) { - AttributeSet attr = it.getAttributes(); - if (attr != null) { - String href = (String) attr.getAttribute(HTML.Attribute.HREF); - if (href != null && href.endsWith("." + filetype)) { - URL url = new URL(new URL("http", "www.google.com", "dummy"), href); - if (url.getHost().indexOf("google") == -1) { - urls.add(url); - } + HTMLEditorKit kit = new HTMLEditorKit(); + HTMLDocument doc = new HTMLDocument(); + doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE); + kit.read(new InputStreamReader(in, "UTF-8"), doc, 0); + HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A); + while (it.isValid()) { + AttributeSet attr = it.getAttributes(); + if (attr != null) { + String href = (String) attr.getAttribute(HTML.Attribute.HREF); + if (href != null && href.endsWith("." + filetype)) { + URL url = new URL(new URL("http", "www.google.com", "dummy"), href); + if (url.getHost().indexOf("google") == -1) { + urls.add(url); } } - it.next(); } - } finally { - in.close(); + it.next(); } - return (URL[]) urls.toArray(new URL[urls.size()]); - } catch (Exception e) { - throw e; + } finally { + in.close(); } + return (URL[]) urls.toArray(new URL[urls.size()]); } }
Seed word:"/>
Seed word (optional):"/>
Number of documents:
Document types:/> Adobe Acrobat PDF
/> Rich Text Format
/> Microsoft Word
/> Microsoft PowerPoint
/> Microsoft Excel