httpd-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Cunningham Anthony" <anthony...@om.asahi-kasei.co.jp>
Subject Re: [users@httpd] Apache returns 404 not found for non ascii filenames with Java Plugin.
Date Wed, 15 Jan 2003 08:54:06 GMT
Jeff,

I tried java 1.4 and it seemed to work, only partly.

When using java.net.URLEncoder.encode(KanjiURL, "UTF-8"); it will encode the "://" from http://somehost.com

and all subsequent "/" characters into a form that is unacceptable to java.net.URL.
By encoding the non-ascii filename only and then appending it to http://somehost.com/ 
and then passing that into java.net.URL it seems to work.

After getting this to work I searched for UTF-8 on google and found this page:
ftp://ftp.isi.edu/in-notes/rfc2279.txt

There is a java class for encoding unicode strings.
I had to change it slightly to ignore "/" and ":" and then it worked perfectly.
And it works with the 1.3 compiler and plugin.

I'm copying the files below this but they include japanese characters so I'm unsure how they'll
appear in your mail client.
If anyone wants then emailed please contact me:
these will create two applets.
both applets will have 3 buttons that download a non ascii filename image.
the first button will download using a url that Apaches provides when it lists the file as
an index.
the second button tries a non ascii URL.
the third button tries a UTF-8 encoded 

the first and third button will have an image , the middle one will not.

************** Extract from Apache Log ************************
127.0.0.1 - - [15/Jan/2003:17:08:42 +0900] "GET /icons/%e6%bc%a2%e5%ad%97.gif HTTP/1.1" 200
242
127.0.0.1 - - [15/Jan/2003:17:08:42 +0900] "GET /icons/漢字.gif HTTP/1.1" 404 283
127.0.0.1 - - [15/Jan/2003:17:13:23 +0900] "GET /icons/%e6%bc%a2%e5%ad%97.gif HTTP/1.1" 200
242
127.0.0.1 - - [15/Jan/2003:17:13:23 +0900] "GET /icons/漢字.gif HTTP/1.1" 404 283
10.51.132.144 - - [15/Jan/2003:17:14:30 +0900] "GET /icons/%e6%bc%a2%e5%ad%97.gif HTTP/1.1"
304 0
10.51.132.144 - - [15/Jan/2003:17:14:30 +0900] "GET /icons/漢字.gif HTTP/1.1" 404 283
10.51.132.144 - - [15/Jan/2003:17:14:30 +0900] "GET /icons/漢字.gif HTTP/1.1" 404 283
10.51.132.144 - - [15/Jan/2003:17:14:30 +0900] "GET /icons/%E6%BC%A2%E5%AD%97.gif HTTP/1.1"
304 0

******************************************************

/* *************        Java 1.4 ***************/ 
import javax.swing.*;

public class ApacheNonAsciiJavaPlugin1_4 extends JApplet 
{
       JButton ApacheName;
       JButton KanjiName;
       JButton EncodedKanjiName;
       
       public ApacheNonAsciiJavaPlugin1_4()
       {
               ApacheName=new JButton();
               KanjiName=new JButton();
               EncodedKanjiName=new JButton();
             
               
            ApacheName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    java.net.URL URL1=new java.net.URL("http://apachehost:8080/icons/%e6%bc%a2%e5%ad%97.gif");
                                    System.out.println("URL1 = " + URL1);
                                    return new javax.swing.ImageIcon(URL1);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            ApacheName.setText("Using %e6%bc%a2%e5%ad%97");
            getContentPane().add(ApacheName, java.awt.BorderLayout.NORTH);
            KanjiName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    java.net.URL URL2=new java.net.URL("http://apachehost:8080/icons/漢字.gif");
                                    System.out.println("URL2 = " + URL2);
                                    return new javax.swing.ImageIcon(URL2);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            KanjiName.setText("Using 漢字");
            getContentPane().add(KanjiName, java.awt.BorderLayout.CENTER);

            EncodedKanjiName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    String KanjiURL="漢字.gif";
                                    //String EncodedKanjiURL= URLUTF8Encoder.encode(KanjiURL);
                                    String EncodedKanjiURL= java.net.URLEncoder.encode(KanjiURL,
"UTF-8");
                                    System.out.println("URL3 = " + KanjiURL);
                                    System.out.println("URL3 Encoded 1.4= " + EncodedKanjiURL);
                                    java.net.URL URL3=new java.net.URL("http://apachehost:8080/icons/"+
EncodedKanjiURL);
                                    
                                    return new javax.swing.ImageIcon(URL3);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            try
            {
                    EncodedKanjiName.setText("Using 漢字 encoded to "+ java.net.URLEncoder.encode("漢字",
"UTF-8"));
            }
            catch(Exception e)
            {
            }
            getContentPane().add(EncodedKanjiName, java.awt.BorderLayout.SOUTH);
       }
       
}


/***********                Java 1.3            ***************/
import javax.swing.*;

public class ApacheNonAsciiJavaPlugin1_3 extends JApplet
{
       JButton ApacheName;
       JButton KanjiName;
       JButton EncodedKanjiName;
       
       public ApacheNonAsciiJavaPlugin1_3()
       {
               ApacheName=new JButton();
               KanjiName=new JButton();
               EncodedKanjiName=new JButton();
             
               
            ApacheName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    java.net.URL URL1=new java.net.URL("http://apachehost:8080/icons/%e6%bc%a2%e5%ad%97.gif");
                                    System.out.println("URL1 = " + URL1);
                                    return new javax.swing.ImageIcon(URL1);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            ApacheName.setText("Using %e6%bc%a2%e5%ad%97");
            getContentPane().add(ApacheName, java.awt.BorderLayout.NORTH);
            KanjiName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    java.net.URL URL2=new java.net.URL("http://apachehost:8080/icons/漢字.gif");
                                    System.out.println("URL2 = " + URL2);
                                    return new javax.swing.ImageIcon(URL2);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            KanjiName.setText("Using 漢字");
            getContentPane().add(KanjiName, java.awt.BorderLayout.CENTER);

            EncodedKanjiName.setIcon(new JLabel() {
                    public Icon getIcon() {
                            try {
                                    String KanjiURL="http://apachehost:8080/icons/漢字.gif";
                                    String EncodedKanjiURL= URLUTF8Encoder.encode(KanjiURL);
                                    java.net.URL URL3=new java.net.URL(EncodedKanjiURL);
                                    System.out.println("URL3 = " + URL3);
                                    System.out.println("URL3 Encoded = " + EncodedKanjiURL);
                                    
                                    return new javax.swing.ImageIcon(URL3);
                            } catch (Exception e) {
                                    e.printStackTrace();
                            }
                            return null;
                    }
            }.getIcon());
            EncodedKanjiName.setText("Using 漢字 encoded to "+URLUTF8Encoder.encode("漢字"));
            getContentPane().add(EncodedKanjiName, java.awt.BorderLayout.SOUTH);
       }
       
}

/*******************       URLUTF8Encoder for java 1.3                 **********************/
/**
 * Provides a method to encode any string into a URL-safe
 * form.
 * Non-ASCII characters are first encoded as sequences of
 * two or three bytes, using the UTF-8 algorithm, before being
 * encoded as %HH escapes.
 */
public class URLUTF8Encoder
{

  final static String[] hex = {
    "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
    "%08", "%09", "%0a", "%0b", "%0c", "%0d", "%0e", "%0f",
    "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
    "%18", "%19", "%1a", "%1b", "%1c", "%1d", "%1e", "%1f",
    "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
    "%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f",
    "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
    "%38", "%39", "%3a", "%3b", "%3c", "%3d", "%3e", "%3f",
    "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
    "%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f",
    "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
    "%58", "%59", "%5a", "%5b", "%5c", "%5d", "%5e", "%5f",
    "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
    "%68", "%69", "%6a", "%6b", "%6c", "%6d", "%6e", "%6f",
    "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
    "%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f",
    "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
    "%88", "%89", "%8a", "%8b", "%8c", "%8d", "%8e", "%8f",
    "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
    "%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f",
    "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7",
    "%a8", "%a9", "%aa", "%ab", "%ac", "%ad", "%ae", "%af",
    "%b0", "%b1", "%b2", "%b3", "%b4", "%b5", "%b6", "%b7",
    "%b8", "%b9", "%ba", "%bb", "%bc", "%bd", "%be", "%bf",
    "%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7",
    "%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf",
    "%d0", "%d1", "%d2", "%d3", "%d4", "%d5", "%d6", "%d7",
    "%d8", "%d9", "%da", "%db", "%dc", "%dd", "%de", "%df",
    "%e0", "%e1", "%e2", "%e3", "%e4", "%e5", "%e6", "%e7",
    "%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef",
    "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
    "%f8", "%f9", "%fa", "%fb", "%fc", "%fd", "%fe", "%ff"
  };

  /**
   * Encode a string to the "x-www-form-urlencoded" form, enhanced
   * with the UTF-8-in-URL proposal. This is what happens:
   *
   * <ul>
   * <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z',
   *        and '0' through '9' remain the same.
   *
   * <li><p>The unreserved characters - _ . ! ‾ * ' ( ) remain the same.
   *
   * <li><p>The space character ' ' is converted into a plus sign '+'.
   *
   * <li><p>All other ASCII characters are converted into the
   *        3-character string "%xy", where xy is
   *        the two-digit hexadecimal representation of the character
   *        code
   *
   * <li><p>All non-ASCII characters are encoded in two steps: first
   *        to a sequence of 2 or 3 bytes, using the UTF-8 algorithm;
   *        secondly each of these bytes is encoded as "%xx".
   * </ul>
   *
   * @param s The string to be encoded
   * @return The encoded string
   */
  public static String encode(String s)
  {
    StringBuffer sbuf = new StringBuffer();
    int len = s.length();
    for (int i = 0; i < len; i++) {
      int ch = s.charAt(i);
      if ('A' <= ch && ch <= 'Z') {// 'A'..'Z'
sbuf.append((char)ch);
      } else if ('a' <= ch && ch <= 'z') {// 'a'..'z'
sbuf.append((char)ch);
      } else if ('0' <= ch && ch <= '9') {// '0'..'9'
sbuf.append((char)ch);
      } else if (ch == ' ') {// space
sbuf.append('+');
      } else if (ch == '-' || ch == '_'// unreserved
          || ch == '.' || ch == '!'
          || ch == '‾' || ch == '*'
          || ch == '¥'' || ch == '('
          || ch == ')' || ch=='/'  || ch==':' ) {
sbuf.append((char)ch);
      } else if (ch <= 0x007f) {// other ASCII
sbuf.append(hex[ch]);
      } else if (ch <= 0x07FF) {// non-ASCII <= 0x7FF
sbuf.append(hex[0xc0 | (ch >> 6)]);
sbuf.append(hex[0x80 | (ch & 0x3F)]);
      } else {// 0x7FF < ch <= 0xFFFF
sbuf.append(hex[0xe0 | (ch >> 12)]);
sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]);
sbuf.append(hex[0x80 | (ch & 0x3F)]);
      }
    }
    return sbuf.toString();
  }

}


I hope this helps someone.

Anthony




Jeff Cohen wrote:

  Hi Anthony,

  I'm still looking for a solution for both of us I guess, I just started
  building a web based photo album and I need that Hebrew support, when I'll
  get the answer for it, I'll let you know.

  Jeff Cohen

  > -----Original Message-----
  > From: Anthony Cunningham [mailto:anthony.cb@om.asahi-kasei.co.jp]
  > Sent: Thursday, January 09, 2003 8:55 PM
  > To: users@httpd.apache.org
  > Subject: Re: [users@httpd] Apache returns 404 not found for non ascii
  > filenames with Java Plugin.
  >
  > Thanks Jeff,
  >
  > I downloaded and tried  1.3.27 on win32 (the latest 1.3 version I believe)
  > yesterday but now the URL images/JA_NAME.jpg becomes
  > ¥xb6¥xc0¥xb6¥xc5/¥xb6¥xc0¥x83J¥x83i.jpg in the access log with a 403
  > and the url images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg have code 404.
  >
  > Anthony
  >
  >
  > Jeff Cohen wrote:
  >
  > > Hi Anthony,
  > >
  > > As far as I know (from same problem in Hebrew) this feature is not
  > supported
  > > in the 2.x versions, I didn't have that problem previously when I had
  > > 1.3.2x.
  > > If you find something that might make it to work I'll be very happy if
  > > you'll share it with me.
  > >
  > > All the best,
  > > Jeff Cohen
  > >
  > > > -----Original Message-----
  > > > From: Anthony Cunningham [mailto:anthony.cb@om.asahi-kasei.co.jp]
  > > > Sent: Thursday, January 09, 2003 4:10 AM
  > > > Subject: [users@httpd] Apache returns 404 not found for non ascii
  > > > filenames with Java Plugin.
  > > >
  > > > I searched the archives and the web for this problem but was unable to
  > > > find a clear solution.
  > > > Basically the problem is as follows:
  > > >
  > > > In our java applet it downloads images from the web server.  Recently
  > we
  > > > have been experimenting with using Apache 2 on windows instead of IIS.
  > > > A lot of our images have japanese character filenames and/or japanese
  > > > character directory paths. ie. non-ascii.
  > > > Apache will return a 404 not found for these.
  > > >
  > > > for example: (I can't write japanese with this mail client so I will
  > use
  > > > capital letters to indicate japanese characters)
  > > > if the file is images/JA_NAME.jpg
  > > > then Apache returns 404 not found.
  > > > In the access log Apache shows a request for images/JA_NAME.jpg and a
  > > > 404.  This file exists exactly as Apache wrote in the log.
  > > > If you enter images/ in the browser it lists all the files, including
  > > > images/JA_NAME.jpg.
  > > > Click on this and the browser will load
  > > > images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg and you can see the image.
  > > > In the access log it shows images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg and
  > > > 200 which I assume means success.
  > > >
  > > > A few other points
  > > > In Internet Explorer 5.5 entering either images/JA_NAME.jpg or
  > > > images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg succeeds so maybe IE converts
  > > > before requesting.
  > > > In Netscape(R) Communicator 4.7 Japanese version only
  > > > images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg will work.
  > > > If java is told explicity to load
  > images/%e6%bc%a2%e5%ad%97%e5%90%8d.jpg
  > > > it will work.
  > > >
  > > > My boss read on the internet about a module called mod_encoding for
  > > > non-ascii files however this seems to be for linux only.  I'm not sure
  > > > if there is a version for windows yet.  Does anyone have any
  > information
  > > > about this.
  > > >
  > > > It may seem to be a problem with java and URL encoding and could well
  > be
  > > > however the fact that it works fine with IIS makes me wonder if there
  > is
  > > > a way to get it working on Apache Win32.
  > > >
  > > > I would appreciate any help or experience anyone can give.
  > > >
  > > > Anthony
  > > >
  > > > Details:
  > > > Apache/2.0.43 Win32
  > > > Windows NT (Japanese ) and Windows 2000 (Japanese)
  > > > Java Plugin 1.4 and 1.3
  > > >
  > > > For future searchers of this and similar topics:
  > > > unicode filename
  > > > doublebyte filename
  > > > multibyte filename
  > > > japanesee filename
  > > > non-ascii files
  > > >
  > > >
  > > >
  > > >
  > > >
  > > >
  > > > ---------------------------------------------------------------------
  > > > The official User-To-User support forum of the Apache HTTP Server
  > Project.
  > > > See <URL:http://httpd.apache.org/userslist.html> for more info.
  > > > To unsubscribe, e-mail: users-unsubscribe@httpd.apache.org
  > > >    "   from the digest: users-digest-unsubscribe@httpd.apache.org
  > > > For additional commands, e-mail: users-help@httpd.apache.org
  > >
  > > ---------------------------------------------------------------------
  > > The official User-To-User support forum of the Apache HTTP Server
  > Project.
  > > See <URL:http://httpd.apache.org/userslist.html> for more info.
  > > To unsubscribe, e-mail: users-unsubscribe@httpd.apache.org
  > >    "   from the digest: users-digest-unsubscribe@httpd.apache.org
  > > For additional commands, e-mail: users-help@httpd.apache.org
  >
  >
  > ---------------------------------------------------------------------
  > The official User-To-User support forum of the Apache HTTP Server Project.
  > See <URL:http://httpd.apache.org/userslist.html> for more info.
  > To unsubscribe, e-mail: users-unsubscribe@httpd.apache.org
  >    "   from the digest: users-digest-unsubscribe@httpd.apache.org
  > For additional commands, e-mail: users-help@httpd.apache.org

  ---------------------------------------------------------------------
  The official User-To-User support forum of the Apache HTTP Server Project.
  See <URL:http://httpd.apache.org/userslist.html> for more info.
  To unsubscribe, e-mail: users-unsubscribe@httpd.apache.org
     "   from the digest: users-digest-unsubscribe@httpd.apache.org
  For additional commands, e-mail: users-help@httpd.apache.org



---------------------------------------------------------------------
The official User-To-User support forum of the Apache HTTP Server Project.
See <URL:http://httpd.apache.org/userslist.html> for more info.
To unsubscribe, e-mail: users-unsubscribe@httpd.apache.org
   "   from the digest: users-digest-unsubscribe@httpd.apache.org
For additional commands, e-mail: users-help@httpd.apache.org


Mime
View raw message