Return-Path: X-Original-To: apmail-xmlgraphics-fop-commits-archive@www.apache.org Delivered-To: apmail-xmlgraphics-fop-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 70EBD9222 for ; Sun, 26 Feb 2012 02:30:57 +0000 (UTC) Received: (qmail 92182 invoked by uid 500); 26 Feb 2012 02:30:57 -0000 Delivered-To: apmail-xmlgraphics-fop-commits-archive@xmlgraphics.apache.org Received: (qmail 92150 invoked by uid 500); 26 Feb 2012 02:30:57 -0000 Mailing-List: contact fop-commits-help@xmlgraphics.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: fop-dev@xmlgraphics.apache.org Delivered-To: mailing list fop-commits@xmlgraphics.apache.org Received: (qmail 92085 invoked by uid 99); 26 Feb 2012 02:30:55 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 26 Feb 2012 02:30:55 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 26 Feb 2012 02:30:41 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B2BFE2388B71; Sun, 26 Feb 2012 02:29:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1293736 [12/38] - in /xmlgraphics/fop/trunk: ./ src/codegen/java/org/apache/fop/tools/ src/codegen/unicode/java/org/apache/fop/complexscripts/ src/codegen/unicode/java/org/apache/fop/complexscripts/bidi/ src/documentation/content/xdocs/tru... Date: Sun, 26 Feb 2012 02:29:29 -0000 To: fop-commits@xmlgraphics.apache.org From: gadams@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120226022945.B2BFE2388B71@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Positionable.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Positionable.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Positionable.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Positionable.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.fonts; + +// CSOFF: LineLengthCheck + +/** + * Optional interface which indicates that glyph positioning is supported and, if supported, + * can perform positioning. + * @author Glenn Adams + */ +public interface Positionable { + + /** + * Determines if font performs glyph positioning. + * @return true if performs positioning + */ + boolean performsPositioning(); + + /** + * Perform glyph positioning. + * @param cs character sequence to map to position offsets (advancement adjustments) + * @param script a script identifier + * @param language a language identifier + * @param fontSize font size + * @return array (sequence) of 4-tuples of placement [PX,PY] and advance [AX,AY] adjustments, in that order, + * with one 4-tuple for each element of glyph sequence, or null if no non-zero adjustment applies + */ + int[][] performPositioning ( CharSequence cs, String script, String language, int fontSize ); + + /** + * Perform glyph positioning using an implied font size. + * @param cs character sequence to map to position offsets (advancement adjustments) + * @param script a script identifier + * @param language a language identifier + * @return array (sequence) of 4-tuples of placement [PX,PY] and advance [AX,AY] adjustments, in that order, + * with one 4-tuple for each element of glyph sequence, or null if no non-zero adjustment applies + */ + int[][] performPositioning ( CharSequence cs, String script, String language ); + +} Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Substitutable.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Substitutable.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Substitutable.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/fonts/Substitutable.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.fonts; + +// CSOFF: LineLengthCheck + +/** + * Optional interface which indicates that glyph substitution is supported and, if supported, + * can perform substitution. + * @author Glenn Adams + */ +public interface Substitutable { + + /** + * Determines if font performs glyph substitution. + * @return true if performs substitution. + */ + boolean performsSubstitution(); + + /** + * Perform substitutions on characters to effect glyph substitution. If some substitution is performed, it + * entails mapping from one or more input characters denoting textual character information to one or more + * output character codes denoting glyphs in this font, where the output character codes may make use of + * private character code values that have significance only for this font. + * @param cs character sequence to map to output font encoding character sequence + * @param script a script identifier + * @param language a language identifier + * @return output sequence (represented as a character sequence, where each character in the returned sequence + * denotes "font characters", i.e., character codes that map directly (1-1) to their associated glyphs + */ + CharSequence performSubstitution ( CharSequence cs, String script, String language ); + + /** + * Reorder combining marks in character sequence so that they precede (within the sequence) the base + * character to which they are applied. N.B. In the case of LTR segments, marks are not reordered by this, + * method since when the segment is reversed by BIDI processing, marks are automatically reordered to precede + * their base character. + * @param cs character sequence within which combining marks to be reordered + * @param gpa associated glyph position adjustments (also reordered) + * @param script a script identifier + * @param language a language identifier + * @return output sequence containing reordered "font characters" + */ + CharSequence reorderCombiningMarks ( CharSequence cs, int[][] gpa, String script, String language ); + +} Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/ArabicScriptProcessor.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/ArabicScriptProcessor.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/ArabicScriptProcessor.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/ArabicScriptProcessor.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,522 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.scripts; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.fop.complexscripts.bidi.BidiClass; +import org.apache.fop.complexscripts.bidi.BidiConstants; +import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable; +import org.apache.fop.complexscripts.util.GlyphContextTester; +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.complexscripts.util.ScriptContextTester; + +// CSOFF: AvoidNestedBlocksCheck +// CSOFF: NoWhitespaceAfterCheck +// CSOFF: InnerAssignmentCheck +// CSOFF: SimplifyBooleanReturnCheck +// CSOFF: LineLengthCheck + +/** + *

The ArabicScriptProcessor class implements a script processor for + * performing glyph substitution and positioning operations on content associated with the Arabic script.

+ * @author Glenn Adams + */ +public class ArabicScriptProcessor extends DefaultScriptProcessor { + + /** logging instance */ + private static final Log log = LogFactory.getLog(ArabicScriptProcessor.class); // CSOK: ConstantNameCheck + + /** features to use for substitutions */ + private static final String[] gsubFeatures = // CSOK: ConstantNameCheck + { + "calt", // contextual alternates + "ccmp", // glyph composition/decomposition + "fina", // final (terminal) forms + "init", // initial forms + "isol", // isolated formas + "liga", // standard ligatures + "medi", // medial forms + "rlig" // required ligatures + }; + + /** features to use for positioning */ + private static final String[] gposFeatures = // CSOK: ConstantNameCheck + { + "curs", // cursive positioning + "kern", // kerning + "mark", // mark to base or ligature positioning + "mkmk" // mark to mark positioning + }; + + private static class SubstitutionScriptContextTester implements ScriptContextTester { + private static Map/**/ testerMap = new HashMap/**/(); + static { + testerMap.put ( "fina", new GlyphContextTester() { + public boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + return inFinalContext ( script, language, feature, gs, index, flags ); + } + } ); + testerMap.put ( "init", new GlyphContextTester() { + public boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + return inInitialContext ( script, language, feature, gs, index, flags ); + } + } ); + testerMap.put ( "isol", new GlyphContextTester() { + public boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + return inIsolateContext ( script, language, feature, gs, index, flags ); + } + } ); + testerMap.put ( "liga", new GlyphContextTester() { + public boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + return inLigatureContext ( script, language, feature, gs, index, flags ); + } + } ); + testerMap.put ( "medi", new GlyphContextTester() { + public boolean test ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + return inMedialContext ( script, language, feature, gs, index, flags ); + } + } ); + } + public GlyphContextTester getTester ( String feature ) { + return (GlyphContextTester) testerMap.get ( feature ); + } + } + + private static class PositioningScriptContextTester implements ScriptContextTester { + private static Map/**/ testerMap = new HashMap/**/(); + public GlyphContextTester getTester ( String feature ) { + return (GlyphContextTester) testerMap.get ( feature ); + } + } + + private final ScriptContextTester subContextTester; + private final ScriptContextTester posContextTester; + + ArabicScriptProcessor ( String script ) { + super ( script ); + this.subContextTester = new SubstitutionScriptContextTester(); + this.posContextTester = new PositioningScriptContextTester(); + } + + /** {@inheritDoc} */ + public String[] getSubstitutionFeatures() { + return gsubFeatures; + } + + /** {@inheritDoc} */ + public ScriptContextTester getSubstitutionContextTester() { + return subContextTester; + } + + /** {@inheritDoc} */ + public String[] getPositioningFeatures() { + return gposFeatures; + } + + /** {@inheritDoc} */ + public ScriptContextTester getPositioningContextTester() { + return posContextTester; + } + + /** {@inheritDoc} */ + @Override + public GlyphSequence reorderCombiningMarks ( GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language ) { + // a side effect of BIDI reordering is to order combining marks before their base, so we need to override the default here to + // prevent double reordering + return gs; + } + + private static boolean inFinalContext ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( index ); + int[] ca = gs.getCharacterArray ( false ); + int nc = gs.getCharacterCount(); + if ( nc == 0 ) { + return false; + } else { + int s = a.getStart(); + int e = a.getEnd(); + if ( ! hasFinalPrecedingContext ( ca, nc, s, e ) ) { + return false; + } else if ( forcesFinalThisContext ( ca, nc, s, e ) ) { + return true; + } else if ( ! hasFinalFollowingContext ( ca, nc, s, e ) ) { + return false; + } else { + return true; + } + } + } + + private static boolean inInitialContext ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( index ); + int[] ca = gs.getCharacterArray ( false ); + int nc = gs.getCharacterCount(); + if ( nc == 0 ) { + return false; + } else { + int s = a.getStart(); + int e = a.getEnd(); + if ( ! hasInitialPrecedingContext ( ca, nc, s, e ) ) { + return false; + } else if ( ! hasInitialFollowingContext ( ca, nc, s, e ) ) { + return false; + } else { + return true; + } + } + } + + private static boolean inIsolateContext ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( index ); + int nc = gs.getCharacterCount(); + if ( nc == 0 ) { + return false; + } else if ( ( a.getStart() == 0 ) && ( a.getEnd() == nc ) ) { + return true; + } else { + return false; + } + } + + private static boolean inLigatureContext ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( index ); + int[] ca = gs.getCharacterArray ( false ); + int nc = gs.getCharacterCount(); + if ( nc == 0 ) { + return false; + } else { + int s = a.getStart(); + int e = a.getEnd(); + if ( ! hasLigaturePrecedingContext ( ca, nc, s, e ) ) { + return false; + } else if ( ! hasLigatureFollowingContext ( ca, nc, s, e ) ) { + return false; + } else { + return true; + } + } + } + + private static boolean inMedialContext ( String script, String language, String feature, GlyphSequence gs, int index, int flags ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( index ); + int[] ca = gs.getCharacterArray ( false ); + int nc = gs.getCharacterCount(); + if ( nc == 0 ) { + return false; + } else { + int s = a.getStart(); + int e = a.getEnd(); + if ( ! hasMedialPrecedingContext ( ca, nc, s, e ) ) { + return false; + } else if ( ! hasMedialThisContext ( ca, nc, s, e ) ) { + return false; + } else if ( ! hasMedialFollowingContext ( ca, nc, s, e ) ) { + return false; + } else { + return true; + } + } + } + + private static boolean hasFinalPrecedingContext ( int[] ca, int nc, int s, int e ) { + int chp = 0; + int clp = 0; + for ( int i = s; i > 0; i-- ) { + int k = i - 1; + if ( ( k >= 0 ) && ( k < nc ) ) { + chp = ca [ k ]; + clp = BidiClass.getBidiClass ( chp ); + if ( clp != BidiConstants.NSM ) { + break; + } + } + } + if ( clp != BidiConstants.AL ) { + return false; + } else if ( hasIsolateInitial ( chp ) ) { + return false; + } else { + return true; + } + } + + private static boolean forcesFinalThisContext ( int[] ca, int nc, int s, int e ) { + int chl = 0; + int cll = 0; + for ( int i = 0, n = e - s; i < n; i++ ) { + int k = n - i - 1; + int j = s + k; + if ( ( j >= 0 ) && ( j < nc ) ) { + chl = ca [ j ]; + cll = BidiClass.getBidiClass ( chl ); + if ( cll != BidiConstants.NSM ) { + break; + } + } + } + if ( cll != BidiConstants.AL ) { + return false; + } + if ( hasIsolateInitial ( chl ) ) { + return true; + } else { + return false; + } + } + + private static boolean hasFinalFollowingContext ( int[] ca, int nc, int s, int e ) { + int chf = 0; + int clf = 0; + for ( int i = e, n = nc; i < n; i++ ) { + chf = ca [ i ]; + clf = BidiClass.getBidiClass ( chf ); + if ( clf != BidiConstants.NSM ) { + break; + } + } + if ( clf != BidiConstants.AL ) { + return true; + } else if ( hasIsolateFinal ( chf ) ) { + return true; + } else { + return false; + } + } + + private static boolean hasInitialPrecedingContext ( int[] ca, int nc, int s, int e ) { + int chp = 0; + int clp = 0; + for ( int i = s; i > 0; i-- ) { + int k = i - 1; + if ( ( k >= 0 ) && ( k < nc ) ) { + chp = ca [ k ]; + clp = BidiClass.getBidiClass ( chp ); + if ( clp != BidiConstants.NSM ) { + break; + } + } + } + if ( clp != BidiConstants.AL ) { + return true; + } else if ( hasIsolateInitial ( chp ) ) { + return true; + } else { + return false; + } + } + + private static boolean hasInitialFollowingContext ( int[] ca, int nc, int s, int e ) { + int chf = 0; + int clf = 0; + for ( int i = e, n = nc; i < n; i++ ) { + chf = ca [ i ]; + clf = BidiClass.getBidiClass ( chf ); + if ( clf != BidiConstants.NSM ) { + break; + } + } + if ( clf != BidiConstants.AL ) { + return false; + } else if ( hasIsolateFinal ( chf ) ) { + return false; + } else { + return true; + } + } + + private static boolean hasMedialPrecedingContext ( int[] ca, int nc, int s, int e ) { + int chp = 0; + int clp = 0; + for ( int i = s; i > 0; i-- ) { + int k = i - 1; + if ( ( k >= 0 ) && ( k < nc ) ) { + chp = ca [ k ]; + clp = BidiClass.getBidiClass ( chp ); + if ( clp != BidiConstants.NSM ) { + break; + } + } + } + if ( clp != BidiConstants.AL ) { + return false; + } else if ( hasIsolateInitial ( chp ) ) { + return false; + } else { + return true; + } + } + + private static boolean hasMedialThisContext ( int[] ca, int nc, int s, int e ) { + int chf = 0; // first non-NSM char in [s,e) + int clf = 0; + for ( int i = 0, n = e - s; i < n; i++ ) { + int k = s + i; + if ( ( k >= 0 ) && ( k < nc ) ) { + chf = ca [ s + i ]; + clf = BidiClass.getBidiClass ( chf ); + if ( clf != BidiConstants.NSM ) { + break; + } + } + } + if ( clf != BidiConstants.AL ) { + return false; + } + int chl = 0; // last non-NSM char in [s,e) + int cll = 0; + for ( int i = 0, n = e - s; i < n; i++ ) { + int k = n - i - 1; + int j = s + k; + if ( ( j >= 0 ) && ( j < nc ) ) { + chl = ca [ j ]; + cll = BidiClass.getBidiClass ( chl ); + if ( cll != BidiConstants.NSM ) { + break; + } + } + } + if ( cll != BidiConstants.AL ) { + return false; + } + if ( hasIsolateFinal ( chf ) ) { + return false; + } else if ( hasIsolateInitial ( chl ) ) { + return false; + } else { + return true; + } + } + + private static boolean hasMedialFollowingContext ( int[] ca, int nc, int s, int e ) { + int chf = 0; + int clf = 0; + for ( int i = e, n = nc; i < n; i++ ) { + chf = ca [ i ]; + clf = BidiClass.getBidiClass ( chf ); + if ( clf != BidiConstants.NSM ) { + break; + } + } + if ( clf != BidiConstants.AL ) { + return false; + } else if ( hasIsolateFinal ( chf ) ) { + return false; + } else { + return true; + } + } + + private static boolean hasLigaturePrecedingContext ( int[] ca, int nc, int s, int e ) { + return true; + } + + private static boolean hasLigatureFollowingContext ( int[] ca, int nc, int s, int e ) { + int chf = 0; + int clf = 0; + for ( int i = e, n = nc; i < n; i++ ) { + chf = ca [ i ]; + clf = BidiClass.getBidiClass ( chf ); + if ( clf != BidiConstants.NSM ) { + break; + } + } + if ( clf == BidiConstants.AL ) { + return true; + } else { + return false; + } + } + + /** + * Ordered array of Unicode scalars designating those Arabic (Script) Letters + * which exhibit an isolated form in word initial position. + */ + private static int[] isolatedInitials = { + 0x0621, // HAMZA + 0x0622, // ALEF WITH MADDA ABOVE + 0x0623, // ALEF WITH HAMZA ABOVE + 0x0624, // WAW WITH HAMZA ABOVE + 0x0625, // ALEF WITH HAMZA BELOWW + 0x0627, // ALEF + 0x062F, // DAL + 0x0630, // THAL + 0x0631, // REH + 0x0632, // ZAIN + 0x0648, // WAW + 0x0671, // ALEF WASLA + 0x0672, // ALEF WITH WAVY HAMZA ABOVE + 0x0673, // ALEF WITH WAVY HAMZA BELOW + 0x0675, // HIGH HAMZA ALEF + 0x0676, // HIGH HAMZA WAW + 0x0677, // U WITH HAMZA ABOVE + 0x0688, // DDAL + 0x0689, // DAL WITH RING + 0x068A, // DAL WITH DOT BELOW + 0x068B, // DAL WITH DOT BELOW AND SMALL TAH + 0x068C, // DAHAL + 0x068D, // DDAHAL + 0x068E, // DUL + 0x068F, // DUL WITH THREE DOTS ABOVE DOWNWARDS + 0x0690, // DUL WITH FOUR DOTS ABOVE + 0x0691, // RREH + 0x0692, // REH WITH SMALL V + 0x0693, // REH WITH RING + 0x0694, // REH WITH DOT BELOW + 0x0695, // REH WITH SMALL V BELOW + 0x0696, // REH WITH DOT BELOW AND DOT ABOVE + 0x0697, // REH WITH TWO DOTS ABOVE + 0x0698, // JEH + 0x0699, // REH WITH FOUR DOTS ABOVE + 0x06C4, // WAW WITH RING + 0x06C5, // KIRGHIZ OE + 0x06C6, // OE + 0x06C7, // U + 0x06C8, // YU + 0x06C9, // KIRGHIZ YU + 0x06CA, // WAW WITH TWO DOTS ABOVE + 0x06CB, // VE + 0x06CF, // WAW WITH DOT ABOVE + 0x06EE, // DAL WITH INVERTED V + 0x06EF // REH WITH INVERTED V + }; + + private static boolean hasIsolateInitial ( int ch ) { + return Arrays.binarySearch ( isolatedInitials, ch ) >= 0; + } + + /** + * Ordered array of Unicode scalars designating those Arabic (Script) Letters + * which exhibit an isolated form in word final position. + */ + private static int[] isolatedFinals = { + 0x0621 // HAMZA + }; + + private static boolean hasIsolateFinal ( int ch ) { + return Arrays.binarySearch ( isolatedFinals, ch ) >= 0; + } + +} Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DefaultScriptProcessor.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DefaultScriptProcessor.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DefaultScriptProcessor.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DefaultScriptProcessor.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.scripts; + +import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable; +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.complexscripts.util.ScriptContextTester; + +// CSOFF: LineLengthCheck + +/** + * Default script processor, which enables default glyph composition/decomposition, common ligatures, localized forms + * and kerning. + * + * @author Glenn Adams + */ +public class DefaultScriptProcessor extends ScriptProcessor { + + /** features to use for substitutions */ + private static final String[] gsubFeatures = // CSOK: ConstantNameCheck + { + "ccmp", // glyph composition/decomposition + "liga", // common ligatures + "locl" // localized forms + }; + + /** features to use for positioning */ + private static final String[] gposFeatures = // CSOK: ConstantNameCheck + { + "kern", // kerning + "mark", // mark to base or ligature positioning + "mkmk" // mark to mark positioning + }; + + DefaultScriptProcessor ( String script ) { + super ( script ); + } + + @Override + /** {@inheritDoc} */ + public String[] getSubstitutionFeatures() { + return gsubFeatures; + } + + @Override + /** {@inheritDoc} */ + public ScriptContextTester getSubstitutionContextTester() { + return null; + } + + @Override + /** {@inheritDoc} */ + public String[] getPositioningFeatures() { + return gposFeatures; + } + + @Override + /** {@inheritDoc} */ + public ScriptContextTester getPositioningContextTester() { + return null; + } + + @Override + /** {@inheritDoc} */ + public GlyphSequence reorderCombiningMarks ( GlyphDefinitionTable gdef, GlyphSequence gs, int[][] gpa, String script, String language ) { + int ng = gs.getGlyphCount(); + int[] ga = gs.getGlyphArray ( false ); + int nm = 0; + // count combining marks + for ( int i = 0; i < ng; i++ ) { + int gid = ga [ i ]; + if ( gdef.isGlyphClass ( gid, GlyphDefinitionTable.GLYPH_CLASS_MARK ) ) { + nm++; + } + } + // only reorder if there is at least one mark and at least one non-mark glyph + if ( ( nm > 0 ) && ( ( ng - nm ) > 0 ) ) { + GlyphSequence.CharAssociation[] aa = gs.getAssociations ( 0, -1 ); + int[] nga = new int [ ng ]; + int[][] npa = ( gpa != null ) ? new int [ ng ][] : null; + GlyphSequence.CharAssociation[] naa = new GlyphSequence.CharAssociation [ ng ]; + int k = 0; + GlyphSequence.CharAssociation ba = null; + int bg = -1; + int[] bpa = null; + for ( int i = 0; i < ng; i++ ) { + int gid = ga [ i ]; + int[] pa = ( gpa != null ) ? gpa [ i ] : null; + GlyphSequence.CharAssociation ca = aa [ i ]; + if ( gdef.isGlyphClass ( gid, GlyphDefinitionTable.GLYPH_CLASS_MARK ) ) { + nga [ k ] = gid; naa [ k ] = ca; + if ( npa != null ) { + npa [ k ] = pa; + } + k++; + } else { + if ( bg != -1 ) { + nga [ k ] = bg; naa [ k ] = ba; + if ( npa != null ) { + npa [ k ] = bpa; + } + k++; + bg = -1; ba = null; bpa = null; + } + if ( bg == -1 ) { + bg = gid; ba = ca; bpa = pa; + } + } + } + if ( bg != -1 ) { + nga [ k ] = bg; naa [ k ] = ba; + if ( npa != null ) { + npa [ k ] = bpa; + } + k++; + } + assert k == ng; + if ( npa != null ) { + System.arraycopy ( npa, 0, gpa, 0, ng ); + } + return new GlyphSequence ( gs, null, nga, null, null, naa, null ); + } else { + return gs; + } + } + +} Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DevanagariScriptProcessor.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DevanagariScriptProcessor.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DevanagariScriptProcessor.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/DevanagariScriptProcessor.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,537 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.scripts; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.complexscripts.util.ScriptContextTester; + +// CSOFF: AvoidNestedBlocksCheck +// CSOFF: NoWhitespaceAfterCheck +// CSOFF: WhitespaceAfter +// CSOFF: InnerAssignmentCheck +// CSOFF: SimplifyBooleanReturnCheck +// CSOFF: LineLengthCheck + +/** + *

The DevanagariScriptProcessor class implements a script processor for + * performing glyph substitution and positioning operations on content associated with the Devanagari script.

+ * @author Glenn Adams + */ +public class DevanagariScriptProcessor extends IndicScriptProcessor { + + /** logging instance */ + private static final Log log = LogFactory.getLog(DevanagariScriptProcessor.class); // CSOK: ConstantNameCheck + + DevanagariScriptProcessor ( String script ) { + super ( script ); + } + + @Override + protected Class getSyllabizerClass() { + return DevanagariSyllabizer.class; + } + + @Override + // find rightmost pre-base matra + protected int findPreBaseMatra ( GlyphSequence gs ) { + int ng = gs.getGlyphCount(); + int lk = -1; + for ( int i = ng; i > 0; i-- ) { + int k = i - 1; + if ( containsPreBaseMatra ( gs, k ) ) { + lk = k; + break; + } + } + return lk; + } + + @Override + // find leftmost pre-base matra target, starting from source + protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) { + int ng = gs.getGlyphCount(); + int lk = -1; + for ( int i = ( source < ng ) ? source : ng; i > 0; i-- ) { + int k = i - 1; + if ( containsConsonant ( gs, k ) ) { + if ( containsHalfConsonant ( gs, k ) ) { + lk = k; + } else if ( lk == -1 ) { + lk = k; + } else { + break; + } + } + } + return lk; + } + + private static boolean containsPreBaseMatra ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isPreM ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsConsonant ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isC ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsHalfConsonant ( GlyphSequence gs, int k ) { + Boolean half = (Boolean) gs.getAssociation ( k ) . getPredication ( "half" ); + return ( half != null ) ? half.booleanValue() : false; + } + + @Override + protected int findReph ( GlyphSequence gs ) { + int ng = gs.getGlyphCount(); + int li = -1; + for ( int i = 0; i < ng; i++ ) { + if ( containsReph ( gs, i ) ) { + li = i; + break; + } + } + return li; + } + + @Override + protected int findRephTarget ( GlyphSequence gs, int source ) { + int ng = gs.getGlyphCount(); + int c1 = -1; + int c2 = -1; + // first candidate target is after first non-half consonant + for ( int i = 0; i < ng; i++ ) { + if ( ( i != source ) && containsConsonant ( gs, i ) ) { + if ( ! containsHalfConsonant ( gs, i ) ) { + c1 = i + 1; + break; + } + } + } + // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark + for ( int i = ( c1 >= 0 ) ? c1 : 0; i < ng; i++ ) { + if ( containsMatra ( gs, i ) && ! containsPreBaseMatra ( gs, i ) ) { + c2 = i + 1; + } else if ( containsOtherMark ( gs, i ) ) { + c2 = i; + break; + } + } + if ( c2 >= 0 ) { + return c2; + } else if ( c1 >= 0 ) { + return c1; + } else { + return source; + } + } + + private static boolean containsReph ( GlyphSequence gs, int k ) { + Boolean rphf = (Boolean) gs.getAssociation ( k ) . getPredication ( "rphf" ); + return ( rphf != null ) ? rphf.booleanValue() : false; + } + + private static boolean containsMatra ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isM ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsOtherMark ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + switch ( typeOf ( ca [ i ] ) ) { + case C_T: // tone (e.g., udatta, anudatta) + case C_A: // accent (e.g., acute, grave) + case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) + return true; + default: + break; + } + } + return false; + } + + private static class DevanagariSyllabizer extends DefaultSyllabizer { + DevanagariSyllabizer ( String script, String language ) { + super ( script, language ); + } + @Override + // | C ... + protected int findStartOfSyllable ( int[] ca, int s, int e ) { + if ( ( s < 0 ) || ( s >= e ) ) { + return -1; + } else { + while ( s < e ) { + int c = ca [ s ]; + if ( isC ( c ) ) { + break; + } else { + s++; + } + } + return s; + } + } + @Override + // D* L? | ... + protected int findEndOfSyllable ( int[] ca, int s, int e ) { + if ( ( s < 0 ) || ( s >= e ) ) { + return -1; + } else { + int nd = 0; + int nl = 0; + int i; + // consume dead consonants + while ( ( i = isDeadConsonant ( ca, s, e ) ) > s ) { + s = i; nd++; + } + // consume zero or one live consonant + if ( ( i = isLiveConsonant ( ca, s, e ) ) > s ) { + s = i; nl++; + } + return ( ( nd > 0 ) || ( nl > 0 ) ) ? s : -1; + } + } + // D := ( C N? H )? + private int isDeadConsonant ( int[] ca, int s, int e ) { + if ( s < 0 ) { + return -1; + } else { + int c, i = 0; + int nc = 0, nh = 0; + do { + // C + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isC ( c ) ) { + i++; + nc++; + } else { + break; + } + } + // N? + if ( ( s + i ) < e ) { + c = ca [ s + 1 ]; + if ( isN ( c ) ) { + i++; + } + } + // H + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isH ( c ) ) { + i++; + nh++; + } else { + break; + } + } + } while ( false ); + return ( nc > 0 ) && ( nh > 0 ) ? s + i : -1; + } + } + // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) + private int isLiveConsonant ( int[] ca, int s, int e ) { + if ( s < 0 ) { + return -1; + } else { + int c, i = 0; + int nc = 0, nv = 0, nx = 0; + do { + // C + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isC ( c ) ) { + i++; + nc++; + } else if ( isV ( c ) ) { + i++; + nv++; + } else { + break; + } + } + // N? + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isN ( c ) ) { + i++; + } + } + // X* + while ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isX ( c ) ) { + i++; + nx++; + } else { + break; + } + } + } while ( false ); + // if no X but has H, then ignore C|I + if ( nx == 0 ) { + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isH ( c ) ) { + if ( nc > 0 ) { + nc--; + } else if ( nv > 0 ) { + nv--; + } + } + } + } + return ( ( nc > 0 ) || ( nv > 0 ) ) ? s + i : -1; + } + } + } + + // devanagari character types + static final short C_U = 0; // unassigned + static final short C_C = 1; // consonant + static final short C_V = 2; // vowel + static final short C_M = 3; // vowel sign (matra) + static final short C_S = 4; // symbol or sign + static final short C_T = 5; // tone mark + static final short C_A = 6; // accent mark + static final short C_P = 7; // punctuation + static final short C_D = 8; // digit + static final short C_H = 9; // halant (virama) + static final short C_O = 10; // other signs + static final short C_N = 0x0100; // nukta(ized) + static final short C_R = 0x0200; // reph(ized) + static final short C_PRE = 0x0400; // pre-base + static final short C_M_TYPE = 0x00FF; // type mask + static final short C_M_FLAGS = 0x7F00; // flag mask + // devanagari block range + static final int ccaStart = 0x0900; // first code point mapped by cca // CSOK: ConstantNameCheck + static final int ccaEnd = 0x0980; // last code point + 1 mapped by cca // CSOK: ConstantNameCheck + // devanagari character type lookups + static final short[] cca = { // CSOK: ConstantNameCheck + C_O, // 0x0900 // INVERTED CANDRABINDU + C_O, // 0x0901 // CANDRABINDU + C_O, // 0x0902 // ANUSVARA + C_O, // 0x0903 // VISARGA + C_V, // 0x0904 // SHORT A + C_V, // 0x0905 // A + C_V, // 0x0906 // AA + C_V, // 0x0907 // I + C_V, // 0x0908 // II + C_V, // 0x0909 // U + C_V, // 0x090A // UU + C_V, // 0x090B // VOCALIC R + C_V, // 0x090C // VOCALIC L + C_V, // 0x090D // CANDRA E + C_V, // 0x090E // SHORT E + C_V, // 0x090F // E + C_V, // 0x0910 // AI + C_V, // 0x0911 // CANDRA O + C_V, // 0x0912 // SHORT O + C_V, // 0x0913 // O + C_V, // 0x0914 // AU + C_C, // 0x0915 // KA + C_C, // 0x0916 // KHA + C_C, // 0x0917 // GA + C_C, // 0x0918 // GHA + C_C, // 0x0919 // NGA + C_C, // 0x091A // CA + C_C, // 0x091B // CHA + C_C, // 0x091C // JA + C_C, // 0x091D // JHA + C_C, // 0x091E // NYA + C_C, // 0x091F // TTA + C_C, // 0x0920 // TTHA + C_C, // 0x0921 // DDA + C_C, // 0x0922 // DDHA + C_C, // 0x0923 // NNA + C_C, // 0x0924 // TA + C_C, // 0x0925 // THA + C_C, // 0x0926 // DA + C_C, // 0x0927 // DHA + C_C, // 0x0928 // NA + C_C, // 0x0929 // NNNA + C_C, // 0x092A // PA + C_C, // 0x092B // PHA + C_C, // 0x092C // BA + C_C, // 0x092D // BHA + C_C, // 0x092E // MA + C_C, // 0x092F // YA + C_C|C_R, // 0x0930 // RA // CSOK: WhitespaceAround + C_C|C_R|C_N, // 0x0931 // RRA = 0930+093C // CSOK: WhitespaceAround + C_C, // 0x0932 // LA + C_C, // 0x0933 // LLA + C_C, // 0x0934 // LLLA + C_C, // 0x0935 // VA + C_C, // 0x0936 // SHA + C_C, // 0x0937 // SSA + C_C, // 0x0938 // SA + C_C, // 0x0939 // HA + C_M, // 0x093A // OE (KASHMIRI) + C_M, // 0x093B // OOE (KASHMIRI) + C_N, // 0x093C // NUKTA + C_S, // 0x093D // AVAGRAHA + C_M, // 0x093E // AA + C_M|C_PRE, // 0x093F // I // CSOK: WhitespaceAround + C_M, // 0x0940 // II + C_M, // 0x0941 // U + C_M, // 0x0942 // UU + C_M, // 0x0943 // VOCALIC R + C_M, // 0x0944 // VOCALIC RR + C_M, // 0x0945 // CANDRA E + C_M, // 0x0946 // SHORT E + C_M, // 0x0947 // E + C_M, // 0x0948 // AI + C_M, // 0x0949 // CANDRA O + C_M, // 0x094A // SHORT O + C_M, // 0x094B // O + C_M, // 0x094C // AU + C_H, // 0x094D // VIRAMA (HALANT) + C_M, // 0x094E // PRISHTHAMATRA E + C_M, // 0x094F // AW + C_S, // 0x0950 // OM + C_T, // 0x0951 // UDATTA + C_T, // 0x0952 // ANUDATTA + C_A, // 0x0953 // GRAVE + C_A, // 0x0954 // ACUTE + C_M, // 0x0955 // CANDRA LONG E + C_M, // 0x0956 // UE + C_M, // 0x0957 // UUE + C_C|C_N, // 0x0958 // QA // CSOK: WhitespaceAround + C_C|C_N, // 0x0959 // KHHA // CSOK: WhitespaceAround + C_C|C_N, // 0x095A // GHHA // CSOK: WhitespaceAround + C_C|C_N, // 0x095B // ZA // CSOK: WhitespaceAround + C_C|C_N, // 0x095C // DDDHA // CSOK: WhitespaceAround + C_C|C_N, // 0x095D // RHA // CSOK: WhitespaceAround + C_C|C_N, // 0x095E // FA // CSOK: WhitespaceAround + C_C|C_N, // 0x095F // YYA // CSOK: WhitespaceAround + C_V, // 0x0960 // VOCALIC RR + C_V, // 0x0961 // VOCALIC LL + C_M, // 0x0962 // VOCALIC RR + C_M, // 0x0963 // VOCALIC LL + C_P, // 0x0964 // DANDA + C_P, // 0x0965 // DOUBLE DANDA + C_D, // 0x0966 // ZERO + C_D, // 0x0967 // ONE + C_D, // 0x0968 // TWO + C_D, // 0x0969 // THREE + C_D, // 0x096A // FOUR + C_D, // 0x096B // FIVE + C_D, // 0x096C // SIX + C_D, // 0x096D // SEVEN + C_D, // 0x096E // EIGHT + C_D, // 0x096F // NINE + C_S, // 0x0970 // ABBREVIATION SIGN + C_S, // 0x0971 // HIGH SPACING DOT + C_V, // 0x0972 // CANDRA A (MARATHI) + C_V, // 0x0973 // OE (KASHMIRI) + C_V, // 0x0974 // OOE (KASHMIRI) + C_V, // 0x0975 // AW (KASHMIRI) + C_V, // 0x0976 // UE (KASHMIRI) + C_V, // 0x0977 // UUE (KASHMIRI) + C_U, // 0x0978 // UNASSIGNED + C_C, // 0x0979 // ZHA + C_C, // 0x097A // HEAVY YA + C_C, // 0x097B // GGAA (SINDHI) + C_C, // 0x097C // JJA (SINDHI) + C_C, // 0x097D // GLOTTAL STOP (LIMBU) + C_C, // 0x097E // DDDA (SINDHI) + C_C // 0x097F // BBA (SINDHI) + }; + static int typeOf(int c) { + if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { + return cca [ c - ccaStart ] & C_M_TYPE; + } else { + return C_U; + } + } + static boolean isType(int c, int t) { + return typeOf ( c ) == t; + } + static boolean hasFlag(int c, int f) { + if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { + return ( cca [ c - ccaStart ] & f ) == f; + } else { + return false; + } + } + static boolean isC(int c) { + return isType(c,C_C); + } + static boolean isR(int c) { + return isType(c,C_C) && hasR(c); + } + static boolean isV(int c) { + return isType(c,C_V); + } + static boolean isN(int c) { + return c == 0x093C; + } + static boolean isH(int c) { + return c == 0x094D; + } + static boolean isM(int c) { + return isType(c,C_M); + } + static boolean isPreM(int c) { + return isType(c,C_M) && hasFlag(c,C_PRE); + } + static boolean isX(int c) { + switch ( typeOf ( c ) ) { + case C_M: // matra (combining vowel) + case C_A: // accent mark + case C_T: // tone mark + case C_O: // other (modifying) mark + return true; + default: + return false; + } + } + static boolean hasR(int c) { + return hasFlag(c,C_R); + } + static boolean hasN(int c) { + return hasFlag(c,C_N); + } + +} Added: xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/GujaratiScriptProcessor.java URL: http://svn.apache.org/viewvc/xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/GujaratiScriptProcessor.java?rev=1293736&view=auto ============================================================================== --- xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/GujaratiScriptProcessor.java (added) +++ xmlgraphics/fop/trunk/src/java/org/apache/fop/complexscripts/scripts/GujaratiScriptProcessor.java Sun Feb 26 02:29:01 2012 @@ -0,0 +1,537 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.complexscripts.scripts; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.fop.complexscripts.util.GlyphSequence; +import org.apache.fop.complexscripts.util.ScriptContextTester; + +// CSOFF: AvoidNestedBlocksCheck +// CSOFF: NoWhitespaceAfterCheck +// CSOFF: WhitespaceAfter +// CSOFF: InnerAssignmentCheck +// CSOFF: SimplifyBooleanReturnCheck +// CSOFF: LineLengthCheck + +/** + *

The GujaratiScriptProcessor class implements a script processor for + * performing glyph substitution and positioning operations on content associated with the Gujarati script.

+ * @author Glenn Adams + */ +public class GujaratiScriptProcessor extends IndicScriptProcessor { + + /** logging instance */ + private static final Log log = LogFactory.getLog(GujaratiScriptProcessor.class); // CSOK: ConstantNameCheck + + GujaratiScriptProcessor ( String script ) { + super ( script ); + } + + @Override + protected Class getSyllabizerClass() { + return GujaratiSyllabizer.class; + } + + @Override + // find rightmost pre-base matra + protected int findPreBaseMatra ( GlyphSequence gs ) { + int ng = gs.getGlyphCount(); + int lk = -1; + for ( int i = ng; i > 0; i-- ) { + int k = i - 1; + if ( containsPreBaseMatra ( gs, k ) ) { + lk = k; + break; + } + } + return lk; + } + + @Override + // find leftmost pre-base matra target, starting from source + protected int findPreBaseMatraTarget ( GlyphSequence gs, int source ) { + int ng = gs.getGlyphCount(); + int lk = -1; + for ( int i = ( source < ng ) ? source : ng; i > 0; i-- ) { + int k = i - 1; + if ( containsConsonant ( gs, k ) ) { + if ( containsHalfConsonant ( gs, k ) ) { + lk = k; + } else if ( lk == -1 ) { + lk = k; + } else { + break; + } + } + } + return lk; + } + + private static boolean containsPreBaseMatra ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isPreM ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsConsonant ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isC ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsHalfConsonant ( GlyphSequence gs, int k ) { + Boolean half = (Boolean) gs.getAssociation ( k ) . getPredication ( "half" ); + return ( half != null ) ? half.booleanValue() : false; + } + + @Override + protected int findReph ( GlyphSequence gs ) { + int ng = gs.getGlyphCount(); + int li = -1; + for ( int i = 0; i < ng; i++ ) { + if ( containsReph ( gs, i ) ) { + li = i; + break; + } + } + return li; + } + + @Override + protected int findRephTarget ( GlyphSequence gs, int source ) { + int ng = gs.getGlyphCount(); + int c1 = -1; + int c2 = -1; + // first candidate target is after first non-half consonant + for ( int i = 0; i < ng; i++ ) { + if ( ( i != source ) && containsConsonant ( gs, i ) ) { + if ( ! containsHalfConsonant ( gs, i ) ) { + c1 = i + 1; + break; + } + } + } + // second candidate target is after last non-prebase matra after first candidate or before first syllable or vedic mark + for ( int i = ( c1 >= 0 ) ? c1 : 0; i < ng; i++ ) { + if ( containsMatra ( gs, i ) && ! containsPreBaseMatra ( gs, i ) ) { + c2 = i + 1; + } else if ( containsOtherMark ( gs, i ) ) { + c2 = i; + break; + } + } + if ( c2 >= 0 ) { + return c2; + } else if ( c1 >= 0 ) { + return c1; + } else { + return source; + } + } + + private static boolean containsReph ( GlyphSequence gs, int k ) { + Boolean rphf = (Boolean) gs.getAssociation ( k ) . getPredication ( "rphf" ); + return ( rphf != null ) ? rphf.booleanValue() : false; + } + + private static boolean containsMatra ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + if ( isM ( ca [ i ] ) ) { + return true; + } + } + return false; + } + + private static boolean containsOtherMark ( GlyphSequence gs, int k ) { + GlyphSequence.CharAssociation a = gs.getAssociation ( k ); + int[] ca = gs.getCharacterArray ( false ); + for ( int i = a.getStart(), e = a.getEnd(); i < e; i++ ) { + switch ( typeOf ( ca [ i ] ) ) { + case C_T: // tone (e.g., udatta, anudatta) + case C_A: // accent (e.g., acute, grave) + case C_O: // other (e.g., candrabindu, anusvara, visarga, etc) + return true; + default: + break; + } + } + return false; + } + + private static class GujaratiSyllabizer extends DefaultSyllabizer { + GujaratiSyllabizer ( String script, String language ) { + super ( script, language ); + } + @Override + // | C ... + protected int findStartOfSyllable ( int[] ca, int s, int e ) { + if ( ( s < 0 ) || ( s >= e ) ) { + return -1; + } else { + while ( s < e ) { + int c = ca [ s ]; + if ( isC ( c ) ) { + break; + } else { + s++; + } + } + return s; + } + } + @Override + // D* L? | ... + protected int findEndOfSyllable ( int[] ca, int s, int e ) { + if ( ( s < 0 ) || ( s >= e ) ) { + return -1; + } else { + int nd = 0; + int nl = 0; + int i; + // consume dead consonants + while ( ( i = isDeadConsonant ( ca, s, e ) ) > s ) { + s = i; nd++; + } + // consume zero or one live consonant + if ( ( i = isLiveConsonant ( ca, s, e ) ) > s ) { + s = i; nl++; + } + return ( ( nd > 0 ) || ( nl > 0 ) ) ? s : -1; + } + } + // D := ( C N? H )? + private int isDeadConsonant ( int[] ca, int s, int e ) { + if ( s < 0 ) { + return -1; + } else { + int c, i = 0; + int nc = 0, nh = 0; + do { + // C + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isC ( c ) ) { + i++; + nc++; + } else { + break; + } + } + // N? + if ( ( s + i ) < e ) { + c = ca [ s + 1 ]; + if ( isN ( c ) ) { + i++; + } + } + // H + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isH ( c ) ) { + i++; + nh++; + } else { + break; + } + } + } while ( false ); + return ( nc > 0 ) && ( nh > 0 ) ? s + i : -1; + } + } + // L := ( (C|V) N? X* )?; where X = ( MATRA | ACCENT MARK | TONE MARK | OTHER MARK ) + private int isLiveConsonant ( int[] ca, int s, int e ) { + if ( s < 0 ) { + return -1; + } else { + int c, i = 0; + int nc = 0, nv = 0, nx = 0; + do { + // C + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isC ( c ) ) { + i++; + nc++; + } else if ( isV ( c ) ) { + i++; + nv++; + } else { + break; + } + } + // N? + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isN ( c ) ) { + i++; + } + } + // X* + while ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isX ( c ) ) { + i++; + nx++; + } else { + break; + } + } + } while ( false ); + // if no X but has H, then ignore C|I + if ( nx == 0 ) { + if ( ( s + i ) < e ) { + c = ca [ s + i ]; + if ( isH ( c ) ) { + if ( nc > 0 ) { + nc--; + } else if ( nv > 0 ) { + nv--; + } + } + } + } + return ( ( nc > 0 ) || ( nv > 0 ) ) ? s + i : -1; + } + } + } + + // gujarati character types + static final short C_U = 0; // unassigned + static final short C_C = 1; // consonant + static final short C_V = 2; // vowel + static final short C_M = 3; // vowel sign (matra) + static final short C_S = 4; // symbol or sign + static final short C_T = 5; // tone mark + static final short C_A = 6; // accent mark + static final short C_P = 7; // punctuation + static final short C_D = 8; // digit + static final short C_H = 9; // halant (virama) + static final short C_O = 10; // other signs + static final short C_N = 0x0100; // nukta(ized) + static final short C_R = 0x0200; // reph(ized) + static final short C_PRE = 0x0400; // pre-base + static final short C_M_TYPE = 0x00FF; // type mask + static final short C_M_FLAGS = 0x7F00; // flag mask + // gujarati block range + static final int ccaStart = 0x0A80; // first code point mapped by cca // CSOK: ConstantNameCheck + static final int ccaEnd = 0x0B00; // last code point + 1 mapped by cca // CSOK: ConstantNameCheck + // gujarati character type lookups + static final short[] cca = { // CSOK: ConstantNameCheck + C_U, // 0x0A80 // UNASSIGNED + C_O, // 0x0A81 // CANDRABINDU + C_O, // 0x0A82 // ANUSVARA + C_O, // 0x0A83 // VISARGA + C_U, // 0x0A84 // UNASSIGNED + C_V, // 0x0A85 // A + C_V, // 0x0A86 // AA + C_V, // 0x0A87 // I + C_V, // 0x0A88 // II + C_V, // 0x0A89 // U + C_V, // 0x0A8A // UU + C_V, // 0x0A8B // VOCALIC R + C_V, // 0x0A8C // VOCALIC L + C_V, // 0x0A8D // CANDRA E + C_U, // 0x0A8E // UNASSIGNED + C_V, // 0x0A8F // E + C_V, // 0x0A90 // AI + C_V, // 0x0A91 // CANDRA O + C_U, // 0x0A92 // UNASSIGNED + C_V, // 0x0A93 // O + C_V, // 0x0A94 // AU + C_C, // 0x0A95 // KA + C_C, // 0x0A96 // KHA + C_C, // 0x0A97 // GA + C_C, // 0x0A98 // GHA + C_C, // 0x0A99 // NGA + C_C, // 0x0A9A // CA + C_C, // 0x0A9B // CHA + C_C, // 0x0A9C // JA + C_C, // 0x0A9D // JHA + C_C, // 0x0A9E // NYA + C_C, // 0x0A9F // TTA + C_C, // 0x0AA0 // TTHA + C_C, // 0x0AA1 // DDA + C_C, // 0x0AA2 // DDHA + C_C, // 0x0AA3 // NNA + C_C, // 0x0AA4 // TA + C_C, // 0x0AA5 // THA + C_C, // 0x0AA6 // DA + C_C, // 0x0AA7 // DHA + C_C, // 0x0AA8 // NA + C_U, // 0x0AA9 // UNASSIGNED + C_C, // 0x0AAA // PA + C_C, // 0x0AAB // PHA + C_C, // 0x0AAC // BA + C_C, // 0x0AAD // BHA + C_C, // 0x0AAE // MA + C_C, // 0x0AAF // YA + C_C|C_R, // 0x0AB0 // RA // CSOK: WhitespaceAround + C_U, // 0x0AB1 // UNASSIGNED + C_C, // 0x0AB2 // LA + C_C, // 0x0AB3 // LLA + C_U, // 0x0AB4 // UNASSIGNED + C_C, // 0x0AB5 // VA + C_C, // 0x0AB6 // SHA + C_C, // 0x0AB7 // SSA + C_C, // 0x0AB8 // SA + C_C, // 0x0AB9 // HA + C_U, // 0x0ABA // UNASSIGNED + C_U, // 0x0ABB // UNASSIGNED + C_N, // 0x0ABC // NUKTA + C_S, // 0x0ABD // AVAGRAHA + C_M, // 0x0ABE // AA + C_M|C_PRE, // 0x0ABF // I // CSOK: WhitespaceAround + C_M, // 0x0AC0 // II + C_M, // 0x0AC1 // U + C_M, // 0x0AC2 // UU + C_M, // 0x0AC3 // VOCALIC R + C_M, // 0x0AC4 // VOCALIC RR + C_M, // 0x0AC5 // CANDRA E + C_U, // 0x0AC6 // UNASSIGNED + C_M, // 0x0AC7 // E + C_M, // 0x0AC8 // AI + C_M, // 0x0AC9 // CANDRA O + C_U, // 0x0ACA // UNASSIGNED + C_M, // 0x0ACB // O + C_M, // 0x0ACC // AU + C_H, // 0x0ACD // VIRAMA (HALANT) + C_U, // 0x0ACE // UNASSIGNED + C_U, // 0x0ACF // UNASSIGNED + C_S, // 0x0AD0 // OM + C_U, // 0x0AD1 // UNASSIGNED + C_U, // 0x0AD2 // UNASSIGNED + C_U, // 0x0AD3 // UNASSIGNED + C_U, // 0x0AD4 // UNASSIGNED + C_U, // 0x0AD5 // UNASSIGNED + C_U, // 0x0AD6 // UNASSIGNED + C_U, // 0x0AD7 // UNASSIGNED + C_U, // 0x0AD8 // UNASSIGNED + C_U, // 0x0AD9 // UNASSIGNED + C_U, // 0x0ADA // UNASSIGNED + C_U, // 0x0ADB // UNASSIGNED + C_U, // 0x0ADC // UNASSIGNED + C_U, // 0x0ADD // UNASSIGNED + C_U, // 0x0ADE // UNASSIGNED + C_U, // 0x0ADF // UNASSIGNED + C_V, // 0x0AE0 // VOCALIC RR + C_V, // 0x0AE1 // VOCALIC LL + C_M, // 0x0AE2 // VOCALIC L + C_M, // 0x0AE3 // VOCALIC LL + C_U, // 0x0AE4 // UNASSIGNED + C_U, // 0x0AE5 // UNASSIGNED + C_D, // 0x0AE6 // ZERO + C_D, // 0x0AE7 // ONE + C_D, // 0x0AE8 // TWO + C_D, // 0x0AE9 // THREE + C_D, // 0x0AEA // FOUR + C_D, // 0x0AEB // FIVE + C_D, // 0x0AEC // SIX + C_D, // 0x0AED // SEVEN + C_D, // 0x0AEE // EIGHT + C_D, // 0x0AEF // NINE + C_U, // 0x0AF0 // UNASSIGNED + C_S, // 0x0AF1 // RUPEE SIGN + C_U, // 0x0AF2 // UNASSIGNED + C_U, // 0x0AF3 // UNASSIGNED + C_U, // 0x0AF4 // UNASSIGNED + C_U, // 0x0AF5 // UNASSIGNED + C_U, // 0x0AF6 // UNASSIGNED + C_U, // 0x0AF7 // UNASSIGNED + C_U, // 0x0AF8 // UNASSIGNED + C_U, // 0x0AF9 // UNASSIGNED + C_U, // 0x0AFA // UNASSIGNED + C_U, // 0x0AFB // UNASSIGNED + C_U, // 0x0AFC // UNASSIGNED + C_U, // 0x0AFD // UNASSIGNED + C_U, // 0x0AFE // UNASSIGNED + C_U // 0x0AFF // UNASSIGNED + }; + static int typeOf(int c) { + if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { + return cca [ c - ccaStart ] & C_M_TYPE; + } else { + return C_U; + } + } + static boolean isType(int c, int t) { + return typeOf ( c ) == t; + } + static boolean hasFlag(int c, int f) { + if ( ( c >= ccaStart ) && ( c < ccaEnd ) ) { + return ( cca [ c - ccaStart ] & f ) == f; + } else { + return false; + } + } + static boolean isC(int c) { + return isType(c,C_C); + } + static boolean isR(int c) { + return isType(c,C_C) && hasR(c); + } + static boolean isV(int c) { + return isType(c,C_V); + } + static boolean isN(int c) { + return c == 0x0ABC; + } + static boolean isH(int c) { + return c == 0x0ACD; + } + static boolean isM(int c) { + return isType(c,C_M); + } + static boolean isPreM(int c) { + return isType(c,C_M) && hasFlag(c,C_PRE); + } + static boolean isX(int c) { + switch ( typeOf ( c ) ) { + case C_M: // matra (combining vowel) + case C_A: // accent mark + case C_T: // tone mark + case C_O: // other (modifying) mark + return true; + default: + return false; + } + } + static boolean hasR(int c) { + return hasFlag(c,C_R); + } + static boolean hasN(int c) { + return hasFlag(c,C_N); + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: fop-commits-unsubscribe@xmlgraphics.apache.org For additional commands, e-mail: fop-commits-help@xmlgraphics.apache.org