harmony-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From var...@apache.org
Subject svn commit: r538551 [1/2] - in /harmony/enhanced/drlvm/trunk/vm/jitrino: config/em64t/ config/ia32/ src/codegenerator/ia32/
Date Wed, 16 May 2007 11:58:36 GMT
Author: varlax
Date: Wed May 16 04:58:35 2007
New Revision: 538551

URL: http://svn.apache.org/viewvc?view=rev&rev=538551
Log:
Applied HARMONY-3636 [DRLVM][jit]Color-graph register allocator enable in server mode

Added:
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h   (with props)
Modified:
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/em64t/server.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.cpp
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.h
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32WebMaker.cpp

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/em64t/server.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/em64t/server.emconf?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/em64t/server.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/em64t/server.emconf Wed May 16 04:58:35 2007
@@ -50,15 +50,11 @@
 -XX:jit.SD1_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
 -XX:jit.SD1_OPT.path.dce1=cg_dce
 -XX:jit.SD1_OPT.path.dce2=cg_dce
--XX:jit.SD1_OPT.path.regalloc=bp_regalloc1,bp_regalloc2
--XX:jit.SD1_OPT.path.bp_regalloc1=bp_regalloc
--XX:jit.SD1_OPT.path.bp_regalloc2=bp_regalloc
 
 -XX:jit.SD1_OPT.arg.codegen.dce1.early=yes
--XX:jit.SD1_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP
--XX:jit.SD1_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM
-
 
+-XX:jit.SD1_OPT.path.regalloc=webmaker,cg_regalloc,spillgen
+-XX:jit.SD1_OPT.arg.codegen.regalloc.webmaker.calc=true
 
 -XX:jit.SD2_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
@@ -66,9 +62,6 @@
 -XX:jit.SD2_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,spillgen,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
 -XX:jit.SD2_OPT.path.dce1=cg_dce
 -XX:jit.SD2_OPT.path.dce2=cg_dce
--XX:jit.SD2_OPT.path.regalloc=bp_regalloc1,bp_regalloc2
--XX:jit.SD2_OPT.path.bp_regalloc1=bp_regalloc
--XX:jit.SD2_OPT.path.bp_regalloc2=bp_regalloc
 
 #devirt configuration
 -XX:jit.SD2_OPT.path.devirt_virtual=devirt
@@ -130,9 +123,10 @@
 
 
 -XX:jit.SD2_OPT.arg.codegen.dce1.early=yes
--XX:jit.SD2_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP
--XX:jit.SD2_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM
 -XX:jit.arg.codegen.emitter.align=0
+
+-XX:jit.SD2_OPT.path.regalloc=webmaker,cg_regalloc,spillgen
+-XX:jit.SD2_OPT.arg.codegen.regalloc.webmaker.calc=true
 
 #system properties
 -Djava.compiler=server

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf Wed May 16 04:58:35 2007
@@ -50,16 +50,13 @@
 -XX:jit.SD1_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,copy,i586,layout,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XX:jit.SD1_OPT.path.dce1=cg_dce
 -XX:jit.SD1_OPT.path.dce2=cg_dce
--XX:jit.SD1_OPT.path.regalloc=bp_regalloc1,bp_regalloc2
--XX:jit.SD1_OPT.path.bp_regalloc1=bp_regalloc
--XX:jit.SD1_OPT.path.bp_regalloc2=bp_regalloc
 
 -XX:jit.SD1_OPT.arg.codegen.dce1.early=yes
--XX:jit.SD1_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP
--XX:jit.SD1_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM
 -XX:jit.SD1_OPT.arg.codegen.btr.insertCMOVs=no
 -XX:jit.SD1_OPT.arg.codegen.btr.removeConstCompare=yes
 
+-XX:jit.SD1_OPT.path.regalloc=webmaker,cg_regalloc,spillgen
+-XX:jit.SD1_OPT.arg.codegen.regalloc.webmaker.calc=true
 
 -XX:jit.SD2_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
@@ -67,9 +64,6 @@
 -XX:jit.SD2_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,spillgen,copy,i586,layout,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XX:jit.SD2_OPT.path.dce1=cg_dce
 -XX:jit.SD2_OPT.path.dce2=cg_dce
--XX:jit.SD2_OPT.path.regalloc=bp_regalloc1,bp_regalloc2
--XX:jit.SD2_OPT.path.bp_regalloc1=bp_regalloc
--XX:jit.SD2_OPT.path.bp_regalloc2=bp_regalloc
 
 #devirt configuration
 -XX:jit.SD2_OPT.path.devirt_virtual=devirt
@@ -129,12 +123,12 @@
 -XX:jit.SD2_OPT.arg.optimizer.inline_helpers.instanceOf_hotnessPercent=1
 
 -XX:jit.SD2_OPT.arg.codegen.dce1.early=yes
--XX:jit.SD2_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP
--XX:jit.SD2_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM
 -XX:jit.SD2_OPT.arg.codegen.btr.insertCMOVs=no
 -XX:jit.SD2_OPT.arg.codegen.btr.removeConstCompare=yes
 -XX:jit.arg.codegen.emitter.align=4
 
+-XX:jit.SD2_OPT.path.regalloc=webmaker,cg_regalloc,spillgen
+-XX:jit.SD2_OPT.arg.codegen.regalloc.webmaker.calc=true
 
 #system properties
 -Djava.compiler=server

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp Wed May 16 04:58:35 2007
@@ -19,7 +19,7 @@
  * @version $Revision: 1.11.14.2.4.3 $
  */
 
-#include "Ia32IRManager.h"
+#include "Ia32ConstraintsResolver.h"
 #include "Ia32Printer.h"
 
 namespace Jitrino
@@ -207,108 +207,6 @@
  *      For more details please refer to ConstraintsResolverImpl source code
  */
 
-class ConstraintsResolverImpl
-{
-public:
-    ConstraintsResolverImpl(IRManager &irm)
-        :irManager(irm), 
-        memoryManager(irManager.getOpndCount()*16, "ConstraintsResolverImpl"),
-        basicBlocks(memoryManager, 0), originalOpndCount(0),
-        liveOpnds(memoryManager,0),
-        liveAtDispatchBlockEntry(memoryManager,0),
-        needsOriginalOpnd(memoryManager,0),
-        hoveringOpnds(memoryManager,0),
-        opndReplaceWorkset(memoryManager,0),
-        opndUsage(memoryManager,0)
-    {
-    }
-    
-    void run();
-
-private:
-    /** Get the priority of a the node for sorting in createBasicBlockArray */
-    double getBasicBlockPriority(Node * node);
-    /** Fills the basicBlocks array and orders it according to block exec count (hottest first) */ 
-    void createBasicBlockArray();
-    /** Fills the liveAtDispatchBlockEntry bit set with operands live at dispatch node entries */ 
-    void calculateLiveAtDispatchBlockEntries();
-    /** Pre-sets calculated constraints for each operand */ 
-    void calculateStartupOpndConstraints();
-    /** Scans basicBlocks array and calls resolveConstraints(BasicBlock * bb) for each entry */
-    void resolveConstraints();
-    /** Traverses instructions of bb and calls resolveConstraints(Inst *) 
-     * for each inst 
-     */
-    void resolveConstraints(Node* bb);
-    /**
-     Main logic of constraint resolution for each instrution
-     */
-    void resolveConstraintsWithOG(Inst * inst);
-
-    /** returns constraint describing call-safe locations for opnd in CallInst inst */
-    Constraint getCalleeSaveConstraint(Inst * inst, Opnd * opnd);
-    /** returns constraint describing safe locations for operands live at dispatch node entries */
-    Constraint getDispatchEntryConstraint(Opnd * opnd);
-
-    static bool constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, 
-        unsigned splitThresholdForNoRegs, unsigned splitThresholdFor1Reg, unsigned splitThresholdFor4Regs
-        );
-   
-
-
-    /** Reference to IRManager */ 
-    IRManager&                  irManager;
-
-    /** Private memory manager for this algorithm */ 
-    MemoryManager               memoryManager;
-
-    /** Array of basic blocks to be handled */ 
-    Nodes                       basicBlocks;
-
-    /** result of irManager.getOpndCount before the pass */ 
-    uint32                      originalOpndCount;
-
-    /** Current live set, updated as usual for each instruction in resolveConstraints(Inst*) */ 
-    BitSet                      liveOpnds;
-
-    /** Bit set of operands live at dispatch node entries */ 
-    BitSet                      liveAtDispatchBlockEntry;
-
-    /** Bit set of operands for which original operand should be used wherever possible.
-     *  Currently this is only for operands which are live at dispatch block entries.
-     */ 
-    BitSet                      needsOriginalOpnd;
-
-    /** Temporary bit set of hovering operands (live across call sites)
-     *  Is initialized and used only during resolveConstraints(Inst*)
-     */ 
-    BitSet                      hoveringOpnds;
-
-    /** An array of current substitutions for operands
-     *  Is filled as a result of operand splitting.
-     *  Reset for each basic blocks (all replacements are local within basic blocks)
-     */
-    StlVector<Opnd*>            opndReplaceWorkset;
-
-
-    StlVector<uint32>           opndUsage;
-
-    unsigned callSplitThresholdForNoRegs;
-    unsigned callSplitThresholdFor1Reg;
-    unsigned callSplitThresholdFor4Regs;
-
-    unsigned defSplitThresholdForNoRegs;
-    unsigned defSplitThresholdFor1Reg;
-    unsigned defSplitThresholdFor4Regs;
-
-    unsigned useSplitThresholdForNoRegs;
-    unsigned useSplitThresholdFor1Reg;
-    unsigned useSplitThresholdFor4Regs;
-
-    friend class ConstraintsResolver;
-};
-
-
 //_________________________________________________________________________________________________
 Constraint ConstraintsResolverImpl::getCalleeSaveConstraint(Inst * inst, Opnd * opnd)
 {
@@ -331,7 +229,8 @@
 void ConstraintsResolverImpl::run()
 {
 //  Set all instruction constraints for EntryPoints, CALLs and RETs
-    irManager.applyCallingConventions();
+    if (!second)
+        irManager.applyCallingConventions();
 // Initialization 
     originalOpndCount=irManager.getOpndCount();
     liveOpnds.resizeClear(originalOpndCount);
@@ -682,25 +581,16 @@
     // call the private implementation of the algorithm
     ConstraintsResolverImpl impl(*irManager);
     
-    impl.callSplitThresholdForNoRegs = (unsigned)-1; // always
     getArg("callSplitThresholdForNoRegs", impl.callSplitThresholdForNoRegs);
-    impl.callSplitThresholdFor1Reg = 1; // for very cold code
     getArg("callSplitThresholdFor1Reg", impl.callSplitThresholdFor1Reg);
-    impl.callSplitThresholdFor4Regs = 1; // for very cold code
     getArg("callSplitThresholdFor4Regs", impl.callSplitThresholdFor4Regs);
 
-    impl.defSplitThresholdForNoRegs = 0; // never
     getArg("defSplitThresholdForNoRegs", impl.defSplitThresholdForNoRegs);
-    impl.defSplitThresholdFor1Reg = 0; // never
     getArg("defSplitThresholdFor1Reg", impl.defSplitThresholdFor1Reg);
-    impl.defSplitThresholdFor4Regs = 0; // never
     getArg("defSplitThresholdFor4Regs", impl.defSplitThresholdFor4Regs);
 
-    impl.useSplitThresholdForNoRegs = 0; // never
     getArg("useSplitThresholdForNoRegs", impl.useSplitThresholdForNoRegs);
-    impl.useSplitThresholdFor1Reg = 0; // never
     getArg("useSplitThresholdFor1Reg", impl.useSplitThresholdFor1Reg);
-    impl.useSplitThresholdFor4Regs = 0; // never
     getArg("useSplitThresholdFor4Regs", impl.useSplitThresholdFor4Regs);
 
     impl.run();
@@ -710,4 +600,5 @@
 //_________________________________________________________________________________________________
 
 }}; //namespace Ia32
+
 

Added: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h?view=auto&rev=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h (added)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h Wed May 16 04:58:35 2007
@@ -0,0 +1,283 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+/**
+ * @author Vyacheslav P. Shakin
+ * @version $Revision: 1.11.14.2.4.3 $
+ */
+
+#include "Ia32IRManager.h"
+//#include "Ia32Printer.h"
+
+namespace Jitrino
+{
+namespace Ia32{
+
+//========================================================================================
+// class Ia32ConstraintsResolver
+//========================================================================================
+/**
+ *  class Ia32ConstraintsResolver performs resolution of operand constraints
+ *  and assigns calculated constraints (Opnd::ConstraintKind_Calculated) to operands.
+ *  The resulting calculated constraints of operands determine allowable physical location
+ *  for the operand.
+ *  
+ *  This transformer allows to insert operands into instructions before it 
+ *  regardless instruction constraints except that Initial constraints of explicit 
+ *  instruction operands must have non-null intersections with corresponding constraints 
+ *  of at least one opcode group of the instruction.
+ *  
+ *  ConstraintResolver analyzes instruction constraints and splits operands when necessary.
+ * 
+ *  This transformer ensures that 
+ *  1)  All instruction constraints for EntryPoints, CALLs and RETs
+ *      are set appropriately (IRManager::applyCallingConventions())
+ *  2)  All operands has non-null calculated constraints 
+ *  3)  All operands fits into instructions they are used in (in terms of instruction constraints)
+ *      For example:
+ *          Original code piece:
+ *              I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) 
+ *              I32: MULSD .s65.:double,.t2:double 
+ *              I33: RET t66(0):int16 (AU:s65:double) 
+ *          
+ *              RET imposes constraint on s65 requiring to place it into FP0 register 
+ *              (its FP0D alias in this particular case)
+ *              MULSD imposes constraint on s65 requiring to place it into XMM register 
+ *                      
+ *          After the pass: 
+ *              I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) 
+ *              I32: MULSD .s65.:double,.t2:double 
+ *              I46: (AD:t75:double) =CopyPseudoInst (AU:s65:double) 
+ *              I33: RET t66(20):int16 (AU:t75:double) 
+ *          
+ *              Thus, ConstraintResolver inserted I46 splitting s65 to s65 and t75
+ *              s65 is assigned with Mem|XMM calculated constraint and t75 
+ *              is assigned with FP0D calculated calculated constraint
+ *
+ *  4)  If the live range of an operand crosses a call site and the operand is not redefined 
+ *      in the call site, the calculated constraint of the operand is narrowed the callee-save regs 
+ *      or memory (stack)
+ *      
+ *  5)  If the operand (referred as original operand here) is live at entry of a catch handler 
+ *      then necessary operand splitting is performed as close as possible to the instruction 
+ *      which caused the splitting and original operand is used before and after the instruction. 
+ *      
+ *  The main principle of the algorithm is anding of instruction constraints into
+ *  operand calculated constraints and splitting operands to ensure that the calculated constraint
+ *  is not null
+ *
+ *  This transformer must be inserted before register allocator which relies on 
+ *  calculated operand constraints. 
+ *
+ *  The implementation of this transformer is located in the ConstraintResolverImpl class. 
+ *
+ */
+
+
+//========================================================================================
+// class ConstraintsResolverImpl
+//========================================================================================
+
+/**
+ *  class Ia32ConstraintsResolverImpl is an implementation of simple constraint resolution algorithm
+ *  The algorithm takes one-pass over CFG.
+ *
+ *  The algorithm works as follows: 
+ *      
+ *  1)  Creates an array of basic blocks and orders by bb->getExecCount() 
+ *      in createBasicBlockArray().
+ *      Thus, the algorithm handles hottest basic blocks first and constraints are assigned to operands first 
+ *      from the most frequently used instructions
+ *
+ *  2)  Collects a bit vector of all operands live at entries of all dispatch node entries
+ *      in calculateLiveAtDispatchBlockEntries()
+ *
+ *  3)  For all operands:
+ *      - If an operand has already been assigned to some location 
+ *        (its location constraint is not null) the calculated constraint is set to 
+ *        the location constraint
+ *           
+ *      - If an operand is live at entry of a dispatch node 
+ *        the calculated constraint is set to the constraint 
+ *        preserving operand values during exception throwing
+ *        This constraint is returned by getDispatchEntryConstraint
+ *        In fact this is the constriant for the DRL calling convention
+ *
+ *      This is done in calculateStartupOpndConstraints()
+ *      Originally all calculateed constraints are equial to Initial constraints
+ *
+ *  4)  Walks through all basic blocks collected and arranged at step 1
+ *      in resolveConstraints()
+ *
+ *      The opndReplaceWorkset array of operand replacements is maintained 
+ *      (indexed by from-operand id). 
+ *
+ *      This is the array of current replacement for operands
+ *      and is reset for each basic block (local within basic blocks) 
+ *
+ *      This array is filled as a result of operand splitting and indicates
+ *      which operand must be used instead of original ones for all the instructions
+ *      above the one caused splitting
+ *      
+ *      4.1) Walks throw all instruction of a basic block in backward order 
+ *          in resolveConstraints(BasicBlock * bb) 
+ *          4.1.1) resolves constraints for each instruction 
+ *              in resolveConstraints(Inst * inst);
+ *          
+ *              To do this already collected calculated constraint of 
+ *              either original operand or its current replacement is anded 
+ *              with instruction constraint for this operand occurence and
+ *              if the result is null, new operand is created and substituted instead
+ *                              
+ *              4.1.1.1) All def operands of the isntruction are traversed
+ *                  and operand splitting is performed after the instruction (when necessary)
+ *                  def&use cases are also handled during this step 
+ *              4.1.1.2) If the instruction is CALL, all hovering operands of 
+ *                  the isntruction are traversed.
+ *
+ *                  Hovering operands are operands which are live across a call site and are not
+ *                  redefined in the call site
+ *                  This step ensures operands are saved in callee-save regs or memory 
+ *                  and takes into account whether an operand is live at dispatch node entries
+ *
+ *                  Operand splitting is performed before the instruction (when necessary)
+ *              4.1.1.3) All use operands of the instruction are traversed
+ *                  and operand splitting is performed before the instruction (when necessary)
+ *              
+ *      The current implementation doesn't deal properly with conditional memory constraints.
+ *      I.e. it doesn't resolve properly things like ADD m, m when both operands are already
+ *      assigned.
+ * 
+ *      For more details please refer to ConstraintsResolverImpl source code
+ */
+
+class ConstraintsResolverImpl
+{
+public:
+    ConstraintsResolverImpl(IRManager &irm, bool _second = false)
+        :irManager(irm), 
+        memoryManager(irManager.getOpndCount()*16, "ConstraintsResolverImpl"),
+        basicBlocks(memoryManager, 0), originalOpndCount(0),
+        liveOpnds(memoryManager,0),
+        liveAtDispatchBlockEntry(memoryManager,0),
+        needsOriginalOpnd(memoryManager,0),
+        hoveringOpnds(memoryManager,0),
+        opndReplaceWorkset(memoryManager,0),
+        opndUsage(memoryManager,0),
+
+        callSplitThresholdForNoRegs((unsigned)-1),  // always
+        callSplitThresholdFor1Reg(1),               // for very cold code
+        callSplitThresholdFor4Regs(1),              // for very cold code
+        defSplitThresholdForNoRegs(0),              // never
+        defSplitThresholdFor1Reg(0),                // never
+        defSplitThresholdFor4Regs(0),               // never
+        useSplitThresholdForNoRegs(0),              // never
+        useSplitThresholdFor1Reg(0),                // never
+        useSplitThresholdFor4Regs(0),               // never
+
+        second(_second)
+    {
+    }
+    
+    void run();
+
+private:
+    /** Get the priority of a the node for sorting in createBasicBlockArray */
+    double getBasicBlockPriority(Node * node);
+    /** Fills the basicBlocks array and orders it according to block exec count (hottest first) */ 
+    void createBasicBlockArray();
+    /** Fills the liveAtDispatchBlockEntry bit set with operands live at dispatch node entries */ 
+    void calculateLiveAtDispatchBlockEntries();
+    /** Pre-sets calculated constraints for each operand */ 
+    void calculateStartupOpndConstraints();
+    /** Scans basicBlocks array and calls resolveConstraints(BasicBlock * bb) for each entry */
+    void resolveConstraints();
+    /** Traverses instructions of bb and calls resolveConstraints(Inst *) 
+     * for each inst 
+     */
+    void resolveConstraints(Node* bb);
+    /**
+     Main logic of constraint resolution for each instrution
+     */
+    void resolveConstraintsWithOG(Inst * inst);
+
+    /** returns constraint describing call-safe locations for opnd in CallInst inst */
+    Constraint getCalleeSaveConstraint(Inst * inst, Opnd * opnd);
+    /** returns constraint describing safe locations for operands live at dispatch node entries */
+    Constraint getDispatchEntryConstraint(Opnd * opnd);
+
+    static bool constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, 
+        unsigned splitThresholdForNoRegs, unsigned splitThresholdFor1Reg, unsigned splitThresholdFor4Regs
+        );
+   
+
+
+    /** Reference to IRManager */ 
+    IRManager&                  irManager;
+
+    /** Private memory manager for this algorithm */ 
+    MemoryManager               memoryManager;
+
+    /** Array of basic blocks to be handled */ 
+    Nodes                       basicBlocks;
+
+    /** result of irManager.getOpndCount before the pass */ 
+    uint32                      originalOpndCount;
+
+    /** Current live set, updated as usual for each instruction in resolveConstraints(Inst*) */ 
+    BitSet                      liveOpnds;
+
+    /** Bit set of operands live at dispatch node entries */ 
+    BitSet                      liveAtDispatchBlockEntry;
+
+    /** Bit set of operands for which original operand should be used wherever possible.
+     *  Currently this is only for operands which are live at dispatch block entries.
+     */ 
+    BitSet                      needsOriginalOpnd;
+
+    /** Temporary bit set of hovering operands (live across call sites)
+     *  Is initialized and used only during resolveConstraints(Inst*)
+     */ 
+    BitSet                      hoveringOpnds;
+
+    /** An array of current substitutions for operands
+     *  Is filled as a result of operand splitting.
+     *  Reset for each basic blocks (all replacements are local within basic blocks)
+     */
+    StlVector<Opnd*>            opndReplaceWorkset;
+
+
+    StlVector<uint32>           opndUsage;
+
+    unsigned callSplitThresholdForNoRegs;
+    unsigned callSplitThresholdFor1Reg;
+    unsigned callSplitThresholdFor4Regs;
+
+    unsigned defSplitThresholdForNoRegs;
+    unsigned defSplitThresholdFor1Reg;
+    unsigned defSplitThresholdFor4Regs;
+
+    unsigned useSplitThresholdForNoRegs;
+    unsigned useSplitThresholdFor1Reg;
+    unsigned useSplitThresholdFor4Regs;
+
+    bool second;
+
+    friend class ConstraintsResolver;
+};
+
+}}; //namespace Ia32
+

Propchange: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.cpp?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.cpp (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.cpp Wed May 16 04:58:35 2007
@@ -270,7 +270,7 @@
 }
 
 //_________________________________________________________________________________________________
-Constraint Inst::getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size)
+Constraint Inst::getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size) const
 { 
     Constraint c(OpndKind_Any); 
     if (idx < opndCount){
@@ -336,22 +336,22 @@
     assert(newOpnd != NULL);
     for (uint32 i=0, n=getOpndCount(); i<n; i++){
         uint32 opndRole=getOpndRoles(i);
+        Opnd * opnd=getOpnd(i);
         if (
             (opndRole&opndRoleMask&OpndRole_FromEncoder)!=0 && 
-            (opndRole&opndRoleMask&OpndRole_ForIterator)!=0
+            (opndRole&opndRoleMask&OpndRole_InstLevel)!=0
             ){
-            Opnd * opnd=getOpnd(i);
             if (opnd==oldOpnd){
                 assert((opndRole&OpndRole_Changeable)!=0);
                 setOpnd(i, newOpnd);
                 opnd=newOpnd;
                 replaced = true;
             }
-            if ((opndRoleMask&OpndRole_OpndLevel)!=0)
-                replaced |= opnd->replaceMemOpndSubOpnd(oldOpnd, newOpnd);
         }
+        if ((opndRoleMask&OpndRole_OpndLevel)!=0 &&
+            (opndRoleMask&OpndRole_Use)!=0)
+            replaced |= opnd->replaceMemOpndSubOpnd(oldOpnd, newOpnd);
     }
-
     return replaced;
 }
 

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.h
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.h?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.h (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32Inst.h Wed May 16 04:58:35 2007
@@ -658,7 +658,7 @@
      * @param size - if this parameter is not OpndSize_Null the resulted constraint 
      * is adjusted to the requested size.
     */
-    Constraint getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size = OpndSize_Null);
+    Constraint getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size = OpndSize_Null) const;
 
     /** 
      * Returns true if the position at idx starts or extends the operand live range 

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp Wed May 16 04:58:35 2007
@@ -1,3 +1,19 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
 /**
  * @author Sergey L. Ivashin
  * @version $Revision$
@@ -14,25 +30,19 @@
 #include <sstream>
 #include <stdio.h>
 
-#ifdef _DEBUG__REGALLOC3
-#include "Ia32RegAllocWrite.h"
+#ifdef _DEBUG_REGALLOC3
 #ifdef _MSC_VER
 #pragma warning(disable : 4505)   //unreferenced local function has been removed
 #endif //#ifdef _MSC_VER
-#endif //#ifdef _DEBUG__REGALLOC3
-
-
-//
-//  Flags to tune 
-//
-#define _VAR2_
-#define _REGALLOC3_NEIGBH
-//#define _REGALLOC3_COALESCE
-
+#endif //#ifdef _DEBUG_REGALLOC3
 
 
 using namespace std;
 
+#ifdef _EM64T_
+#define _SKIP_CATCHED
+#endif
+
 namespace Jitrino
 {
 
@@ -64,7 +74,11 @@
 {
     MemoryManager mm;           // this is private MemoryManager, not irm.getMemoryManager()
 
-    int coalesceCount;
+    unsigned flag_SORT;
+    bool flag_NEIGBH;
+    bool flag_COALESCE;
+
+    unsigned coalesceCount;
 
     class BoolMatrix;
     typedef uint32 RegMask;     // used to represent set of registers
@@ -99,9 +113,10 @@
     };
     typedef StlVector<Oprole> Oproles;
 
+    typedef StlList<int> Indexes;
+
     struct Opndx
     {
-        typedef StlList<int> Indexes;
         Indexes* adjacents,
                * hiddens;
 
@@ -112,27 +127,27 @@
 
         int ridx;       // index in Registers of register assigned/will be assigned
         RegMask alloc,  // 0 or mask of the register assigned
-                avail;  // if not assigned, then mask of the registers available    
-        int nbavails;   // number of the registers available for this operand
-        int spillcost;
-        bool spilled;
+                avail;  // if not assigned, then mask of the registers available (defined by calculated constraint)
+        unsigned nbavails;  // number of the registers available for this operand ( =bitCount(avail) )
+        double  spillcost;
+        bool    spill;  // operand selected for spilling
+        bool    ignore; // this operand was coalesced so its entry in the graph is invalid
     };
 
     //  Operand's graph to be colored
     struct Graph : public StlVector<Opndx>
     {
-        Graph (MemoryManager& m)            : StlVector<Opndx>(m), mm(m) {}
+        Graph (MemoryManager& m)            : StlVector<Opndx>(m) {}
 
         void connect (int x1, int x2) const;
         int  disconnect (int x) const;
         void reconnect  (int x) const;
-        void moveNodes (Opndx::Indexes& from, Opndx::Indexes& to, int x) const;
-
-        MemoryManager& mm;
+        void moveNodes (Indexes& from, Indexes& to, int x) const;
     };
     Graph graph;
 
-    int graphsize;
+    size_t graphsize,  // total size of graph (operands + registers)
+             xregbase;   // index of first register in graph
 
     StlVector<int> nstack;
 
@@ -143,14 +158,24 @@
     uint32 getSideEffects () const  {return coalesceCount == 0 ? 0 : SideEffect_InvalidatesLivenessInfo;}
 
     void runImpl();
-    bool verify(bool force=false);
+    void SpillGen ();
+    bool verify (bool force=false);
 
     bool buildGraph ();
+    void processInst (Inst*, BitSet&, int* opandmap, BoolMatrix&, double excount);
+    void showGraph ();
+    void lookLives (Opnd*, BitSet&, int* opandmap, BoolMatrix&);
+    int  findNode  (Opnd*) const;
     bool coalescing (int* opandmap, BoolMatrix& matrix);
-    void coalesce   (int* opandmap, BoolMatrix& matrix, int, int);
+    void coalesce   (int* opandmap, BoolMatrix& matrix, int x0, int x1);
+    int  duplicates (Indexes* list, BoolMatrix& matrix, int x0, int x1);
     void pruneGraph ();
-    void assignRegs ();
-    bool assignReg (int);
+    bool shouldPrune (const Opndx&) const;
+    bool assignRegs ();
+    bool assignReg  (Opndx&);
+    void spillRegs  ();
+    int  spillReg   (Opndx&);
+    int update (const Inst*, const Opnd*, Constraint&) const;
 };
 
 
@@ -170,12 +195,7 @@
 using std::endl;
 using std::ostream;
 
-#ifdef _DEBUG__REGALLOC3
-
-#include "Ia32RegAllocWrite.h"
-
-static void onConstruct (const IRManager&);
-static void onDestruct  ();
+#ifdef _DEBUG_REGALLOC3
 
 struct Sep
 {
@@ -209,15 +229,7 @@
 
 static ostream& operator << (ostream&, const RegAlloc3::Graph&);
 
-struct Dbgout : public  ::std::ofstream
-{
-    Dbgout (const char* s)          {open(s);}
-    ~Dbgout ()                      {close();}
-};
-
-static Dbgout dbgout("RegAlloc3.txt");
-
-#define DBGOUT(s) dbgout << s
+#define DBGOUT(s) log(LogStream::DBG).out() << s
 
 #else
 
@@ -231,9 +243,9 @@
 //========================================================================================
 
 
-static size_t bitCount (RegAlloc3::RegMask mk)
+static int bitCount (RegAlloc3::RegMask mk)
 {
-    size_t count = 0;
+    int count = 0;
     while (mk != 0)
     {
         if ((mk & 1) != 0)
@@ -244,11 +256,11 @@
 }
 
 
-static size_t bitNumber (RegAlloc3::RegMask mk)
+static int bitNumber (RegAlloc3::RegMask mk)
 {
     assert(mk != 0);
 
-    size_t number = 0;
+    int number = 0;
     while (mk != 1)
     {
         ++number;
@@ -299,6 +311,7 @@
     bool  isw;
 };
 
+
 //========================================================================================
 //  BoolMatrix - Symmetric boolean matrix
 //========================================================================================
@@ -308,7 +321,7 @@
 {
 public:
 
-    BoolMatrix (MemoryManager&, int);
+    BoolMatrix (MemoryManager&, size_t);
 
     void clear ();
     void clear (int i, int j)           {at(i,j) = 0;}
@@ -325,12 +338,12 @@
                        : *(j*dim + i + ptr);
     }
 
-    int   dim, dims;
+    size_t dim, dims;
     char* ptr;
 };
 
 
-RegAlloc3::BoolMatrix::BoolMatrix (MemoryManager& mm, int d)
+RegAlloc3::BoolMatrix::BoolMatrix (MemoryManager& mm, size_t d)
 {
     assert(d > 0);
     dim = d;
@@ -357,6 +370,20 @@
 {
     if (params == 0 || strcmp(params, "ALL") == 0)
     {
+#ifdef _EM64T_
+        push_back(Constraint(RegName_RAX)
+                 |Constraint(RegName_RCX)
+                 |Constraint(RegName_RDX)
+                 |Constraint(RegName_RBX)
+                 |Constraint(RegName_RSI)
+                 |Constraint(RegName_RDI)
+                 |Constraint(RegName_RBP)
+                 |Constraint(RegName_R8)
+                 |Constraint(RegName_R9)
+                 |Constraint(RegName_R10)
+                 |Constraint(RegName_R11)
+                 |Constraint(RegName_R12));
+#else
         push_back(Constraint(RegName_EAX)
                  |Constraint(RegName_ECX)
                  |Constraint(RegName_EDX)
@@ -364,16 +391,15 @@
                  |Constraint(RegName_ESI)
                  |Constraint(RegName_EDI)
                  |Constraint(RegName_EBP));
-
-        push_back(Constraint(RegName_XMM1)
+#endif
+        push_back(Constraint(RegName_XMM0)
+                 |Constraint(RegName_XMM1)
                  |Constraint(RegName_XMM2)
                  |Constraint(RegName_XMM3)
                  |Constraint(RegName_XMM4)
                  |Constraint(RegName_XMM5)
                  |Constraint(RegName_XMM6)
                  |Constraint(RegName_XMM7));
-
-        push_back(Constraint(RegName_FP0));
     }
     else
     {
@@ -391,10 +417,10 @@
 
     assert(!empty());
 
-    for (size_t i = 0; i != IRMaxRegKinds; ++i)
+    for (unsigned i = 0; i != IRMaxRegKinds; ++i)
         indexes[i] = -1;
 
-    for (size_t i = 0; i != size(); ++i)
+    for (unsigned i = 0; i != size(); ++i)
         indexes[operator[](i).getKind()] = (int)i;
 }
 
@@ -403,7 +429,7 @@
 {
     if (c.getMask() != 0)
     {
-        for (size_t i = 0; i != size(); ++i)
+        for (unsigned i = 0; i != size(); ++i)
         {
             Constraint& r = operator[](i);
             if (r.getKind() == c.getKind())
@@ -448,7 +474,7 @@
 
     int disc = 0;
 
-    for (Opndx::Indexes::iterator k = opndx.adjacents->begin(); k != opndx.adjacents->end(); ++k)
+    for (Indexes::iterator k = opndx.adjacents->begin(); k != opndx.adjacents->end(); ++k)
     {
     //  this node is adjacent to the node to be disconnected
         const Opndx& adjopndx = at(*k);
@@ -471,7 +497,7 @@
 //  Node to be reconnected
     const Opndx& opndx = at(x);
 
-    for (Opndx::Indexes::iterator k = opndx.hiddens->begin(); k != opndx.hiddens->end(); ++k)
+    for (Indexes::iterator k = opndx.hiddens->begin(); k != opndx.hiddens->end(); ++k)
     {
     //  this node was adjacent to the node to be reconnected
         const Opndx& adjopndx = at(*k);
@@ -482,9 +508,9 @@
 }
 
 
-void RegAlloc3::Graph::moveNodes (Opndx::Indexes& from, Opndx::Indexes& to, int x) const
+void RegAlloc3::Graph::moveNodes (Indexes& from, Indexes& to, int x) const
 {
-    Opndx::Indexes::iterator i;
+    Indexes::iterator i;
     while ((i = find(from.begin(), from.end(), x)) != from.end())
         to.splice(to.begin(), from, i);
 }
@@ -497,65 +523,41 @@
 
 void RegAlloc3::runImpl ()
 {
-#ifdef _DEBUG__REGALLOC3
-    onConstruct(*irManager);
-#endif
-
-    irManager->fixEdgeProfile();
+    getIRManager().fixEdgeProfile();
 
     registers.parse(getArg("regs"));
     DBGOUT("parameters: " << registers << endl;)
 
+    getArg("SORT", flag_SORT = 2);
+    getArg("NEIGBH", flag_NEIGBH = false);
+    getArg("COALESCE", flag_COALESCE = true);
+
     coalesceCount = 0;
 
+    DBGOUT(endl << "passnb 1" << endl;)
     if (buildGraph())
     {
-
-#ifdef _DEBUG__REGALLOC3
-        dbgout << "--- graph" << endl;
-        for (int x = 0; x != graphsize; ++x)
+        pruneGraph();
+        if (!assignRegs())
         {
-            const Opndx& opndx = graph.at(x);
-            dbgout << "(" << x << ") " << opndx.opnd->getId()
-                   << " " << *opndx.opnd << " ridx:" << opndx.ridx 
-                   << " avail:" << hex << opndx.avail << dec << " nbavails:" << opndx.nbavails
-                   << " spillcost:" << opndx.spillcost;
-
-            if (!opndx.adjacents->empty())
+            bool flag_SPILL;
+            getArg("SPILL", flag_SPILL = false);
+            if (flag_SPILL)
             {
-                Sep s;
-                dbgout << " adjacents{";
-                for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
-                    dbgout << s << *i;
-                dbgout << "}";
-            }
+                spillRegs();
+                getIRManager().calculateLivenessInfo();
 
-            if (opndx.neighbs != 0)
-            {
-                Sep s;
-                dbgout << " neighbs{";
-                for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i)
-                    dbgout << s << *i;
-                dbgout << "}";
+                DBGOUT(endl << "passnb 2" << endl;)
+                buildGraph();
+                pruneGraph();
+                assignRegs();
             }
-
-            dbgout << endl;
         }
-        dbgout << "---------" << endl;
-#endif
-
-        pruneGraph();
-        assignRegs();
     }
 
     count_coalesced += coalesceCount;
-
-#ifdef _DEBUG__REGALLOC3
-    RegAllocWrite raw(*irManager, "regalloc3.xml");
-    raw.write();
-
-    onDestruct();
-#endif
+    
+    SpillGen();
 }
 
 
@@ -575,48 +577,69 @@
 }   
 
 
+void RegAlloc3::SpillGen ()   
+{
+/***
+    bool runSpillGen = false;    
+
+    for (unsigned i = 0, opandcount = getIRManager().getOpndCount(); i != opandcount; ++i)
+    {
+        Opnd* opnd  = getIRManager().getOpnd(i);
+        if (opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default).isNull())
+            if (opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default).getKind() == OpndKind_Memory)
+            {
+                opnd->assignMemLocation(MemOpndKind_StackAutoLayout, getIRManager().getRegOpnd(STACK_REG), 0);
+                DBGOUT("assigned to mem " << *opnd << endl;)
+            }
+            else
+                runSpillGen = true;
+    }
+
+    bool* spill_flag = new (getIRManager().getMemoryManager()) bool(runSpillGen);
+    getIRManager().setInfo("SpillGen", spill_flag);
+    DBGOUT("runSpillGen:" << runSpillGen << endl;)
+***/
+}
+
+
 bool RegAlloc3::buildGraph ()   
 {
-    size_t opandcount = getIRManager().getOpndCount();
+    static CountTime buildGraphTimer("ia32::RegAlloc3::buildGraph");
+    AutoTimer tm(buildGraphTimer);
+
+    const unsigned opandcount = getIRManager().getOpndCount();
+    graph.resize(0);
     graph.reserve(opandcount);
 
-//  First, scan all the operands available and see if operand is already assigned
+    Opndx opndx;
+
+//  Scan all the operands available and see if operand is already assigned
 //  or need to be assigned
 
-    Opndx opndx;
     int* opandmap = new (mm) int[opandcount];
 
-    for (size_t i = 0; i != opandcount; ++i)
+    for (unsigned i = 0; i != opandcount; ++i)
     {
-        Opnd* opnd  = getIRManager().getOpnd((uint32)i);
         int mapto = -1;
 
+        Opnd* opnd  = getIRManager().getOpnd(i);
+        opndx.opnd   = opnd;
+        opndx.spill  = false;
+        opndx.ignore = false;
+
         int ridx;
         Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default);
-        if (!loc.isNull())
-        {// this operand is already assigned to some location/register
-            if ((ridx = registers.index(loc)) != -1)
-            {
-                opndx.opnd  = opnd;
-                opndx.ridx  = ridx;
-                opndx.alloc = loc.getMask();
-                mapto = (int)graph.size();
-                graph.push_back(opndx);
-            }
-        }
-        else
+        if (loc.isNull())
         {// this operand is not allocated yet
             loc = opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default);
             if ((ridx = registers.index(loc)) != -1)
             {// operand should be assigned to register
-                opndx.opnd  = opnd;
                 opndx.ridx  = ridx;
                 opndx.alloc = 0;
                 opndx.avail = loc.getMask() & registers[ridx].getMask();
-                opndx.nbavails = (int)bitCount(opndx.avail);
+                opndx.nbavails = bitCount(opndx.avail);
                 assert(opndx.nbavails != 0);
                 opndx.spillcost = 1;
-                opndx.spilled = false;
                 mapto = (int)graph.size();
                 graph.push_back(opndx);
             }
@@ -625,26 +648,50 @@
         opandmap[i] = mapto;
     }
 
-    if ((graphsize = (int)graph.size()) == 0)
+    if ((graphsize = graph.size()) == 0)
         return false;
 
+//  Create graph node for each register available
+
+    xregbase = graph.size();    // graph index of the first register available
+    graph.reserve(graph.size() + registers.size());
+
+    opndx.opnd = 0;
+    opndx.spill  = false;
+    opndx.ignore = false;
+    opndx.ridx  = 0;
+    for (Registers::iterator it = registers.begin(), end = registers.end(); it != end; ++it)
+    {
+        for (RegMask msk = it->getMask(), mk = 1; msk != 0; mk <<= 1)
+            if ((msk & mk) != 0)
+            {
+                msk ^= mk;
+                opndx.alloc = mk;
+                graph.push_back(opndx);
+            }
+
+        ++opndx.ridx;
+    }
+
+    graphsize = graph.size();
+    BoolMatrix matrix(mm, graphsize);
+
     for (Graph::iterator i = graph.begin(); i != graph.end(); ++i)
     {
-        i->adjacents = new (mm) Opndx::Indexes(mm);
-        i->hiddens   = new (mm) Opndx::Indexes(mm);
+        i->adjacents = new (mm) Indexes(mm);
+        i->hiddens   = new (mm) Indexes(mm);
         i->oproles   = new (mm) Oproles(mm);
         i->neighbs   = 0;
     }
 
-//  Second, iterate over all instructions in CFG and calculate which operands
+//  Iterate over all instructions in CFG and calculate which operands
 //  are live simultaneouesly (result stored in matrix)
 
-    BoolMatrix matrix(mm, graphsize);
-
-    BitSet lives(mm, (uint32)opandcount);
+    BitSet lives(mm, opandcount);
 
     const Nodes& nodes = irManager->getFlowGraph()->getNodesPostOrder();
-    for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) {
+    for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) 
+    {
         Node* node = *it;
         if (node->isBlockNode())
         {
@@ -652,11 +699,8 @@
             if (inst == 0)
                 continue;
 
-            int excount = static_cast<int>(node->getExecCount());
-            if (excount < 1)
-                excount = 1;
-            excount *= 100;
-            //int excount = 1;
+            double excount = node->getExecCount() / irManager->getFlowGraph()->getEntryNode()->getExecCount();
+            assert(excount > 0);
 
         //  start with the operands at the block bottom
             getIRManager().getLiveAtExit(node, lives);
@@ -664,58 +708,7 @@
         //  iterate over instructions towards the top of the block
             for (;;)
             {
-                int i, x;
-#ifdef _OLD_
-                Inst::Opnds defs(inst, Inst::OpndRole_AllDefs);
-                for (Inst::Opnds::iterator it = defs.begin(); it != defs.end(); it = defs.next(it))
-                    if ((x = opandmap[i = inst->getOpnd(it)->getId()]) != -1)
-                    {
-                        BitSet::IterB bsk(lives);
-                        int k, y;
-                        for (k = bsk.getNext(); k != -1; k = bsk.getNext())
-                            if (k != i && (y = opandmap[k]) != -1)
-                                matrix.set(x, y);
-                    }
-#else
-                int defx = -1;
-                Oprole oprole;
-                const uint32* oproles = const_cast<const Inst*>(inst)->getOpndRoles();
-                Inst::Opnds opnds(inst, Inst::OpndRole_All);
-                for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it))
-                    if ((x = opandmap[i = inst->getOpnd(it)->getId()]) != -1)
-                    {
-                        Opndx& opndx = graph.at(x);
-
-                        oprole.inst = inst;
-                        oprole.role = oproles[it];
-                        opndx.oproles->push_back(oprole);
-
-                        if (oprole.role & Inst::OpndRole_Def)
-                        {
-                            defx = x;
-                            BitSet::IterB bsk(lives);
-                            int k, y;
-                            for (k = bsk.getNext(); k != -1; k = bsk.getNext())
-                                if (k != i && (y = opandmap[k]) != -1)
-                                    matrix.set(x, y);
-                        }
-#ifdef _REGALLOC3_NEIGBH
-                        else if (defx != -1 && !lives.getBit(opndx.opnd->getId()))
-                        {
-                            Opndx& defopndx = graph.at(defx);
-                            if (defopndx.neighbs == 0)
-                                defopndx.neighbs = new (mm) Opndx::Indexes(mm);
-                            defopndx.neighbs->push_back(x);
-
-                            if (opndx.neighbs ==0)
-                                opndx.neighbs = new (mm) Opndx::Indexes(mm);
-                            opndx.neighbs->push_back(defx);
-                        }
-#endif
-
-                        opndx.spillcost += excount;
-                    }
-#endif
+                processInst(inst, lives, opandmap, matrix, excount);
 
                 if (inst->getPrevInst() == 0)
                     break;
@@ -724,35 +717,198 @@
                 inst = inst->getPrevInst();
             }
         }
+#ifdef _SKIP_CATCHED
+        else if (node->isDispatchNode())
+        {
+            BitSet* tmp = irManager->getLiveAtEntry(node);
+            BitSet::IterB ib(*tmp);
+            int i;
+            for (int x = ib.getNext(); x != -1; x = ib.getNext())
+                if ((i = opandmap[x]) != -1)
+                {
+                    Opndx& opndx = graph.at(i);
+                    opndx.ignore = true;
+                    DBGOUT("catched " << opndx << endl;)
+                }
+        }
+#endif
     }
 
-//  Do iterative coalescing
+//  Detect and ignore not-used operands (e.g. child of coalesced operands)
 
-#ifdef _REGALLOC3_COALESCE
-        while (coalescing(opandmap, matrix))
-        /*nothing*/;
-#endif
+    for (unsigned x = 0; x < xregbase; ++x)
+        if (graph[x].oproles->empty())
+            graph[x].ignore = true;
 
-//  Third, connect nodes that represent simultaneouesly live operands
+//  Connect nodes that represent simultaneouesly live operands
 
-    for (int x1 = 1; x1 < graphsize; ++x1)
-        for (int x2 = 0; x2 < x1; ++x2)
-            if (matrix.test(x1, x2) && graph.at(x1).ridx == graph.at(x2).ridx)
+    for (unsigned x1 = 1; x1 < graphsize; ++x1)
+        for (unsigned x2 = 0; x2 < x1; ++x2)
+            if (matrix.test(x1, x2))
                 graph.connect(x1, x2);
 
+    showGraph();
+
+//  Do iterative coalescing
+
+    if (flag_COALESCE)
+        while (coalescing(opandmap, matrix))
+            /*nothing*/;
+
+    showGraph();
     return true;
 }
 
 
+void RegAlloc3::processInst (Inst* inst, BitSet& lives, int* opandmap, BoolMatrix& matrix, double excount)
+{
+    int defx = -1;
+    Oprole oprole;
+    Inst::Opnds opnds(inst, Inst::OpndRole_All);
+    for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it))
+    {
+        uint32 role = inst->getOpndRoles(it);
+        Opnd*  opnd = inst->getOpnd(it);
+
+    //  For each operand def, look at the all live operand
+        if (role & Inst::OpndRole_Def)
+            lookLives(opnd, lives, opandmap, matrix);
+
+        int i = opnd->getId();
+        int x = opandmap[i];
+        if (x != -1)
+        {
+            Opndx& opndx = graph.at(x);
+
+            if (opndx.oproles->empty() || opndx.oproles->back().inst != inst)
+            {
+                oprole.inst = inst;
+                oprole.role = role;
+                opndx.oproles->push_back(oprole);
+            }
+            else
+                opndx.oproles->back().role |= role;
+
+            if (role & Inst::OpndRole_Def)
+            {
+                defx = x;
+            }
+            else if (flag_NEIGBH && defx != -1 && !lives.getBit(i))
+            {
+                Opndx& defopndx = graph.at(defx);
+                if (defopndx.neighbs == 0)
+                    defopndx.neighbs = new (mm) Indexes(mm);
+                defopndx.neighbs->push_back(x);
+
+                if (opndx.neighbs == 0)
+                    opndx.neighbs = new (mm) Indexes(mm);
+                opndx.neighbs->push_back(defx);
+            }
+
+            opndx.spillcost += excount;
+        }
+    }
+}
+
+
+//  Look for operands live at defining point 
+//
+void RegAlloc3::lookLives (Opnd* opnd, BitSet& lives, int* opandmap, BoolMatrix& matrix)
+{
+    int i = opnd->getId();
+    int x;
+    if ((x = opandmap[i]) == -1 && (x = findNode(opnd)) == -1)
+        return;
+
+    BitSet::IterB bsk(lives);
+    int k, y;
+    for (k = bsk.getNext(); k != -1; k = bsk.getNext())
+        if (k != i)
+            if ((y = opandmap[k]) != -1 || (y = findNode(irManager->getOpnd(k))) != -1)
+                matrix.set(x, y);
+}
+
+
+int RegAlloc3::findNode (Opnd* opnd) const
+{
+    Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location);
+    if (!loc.isNull())
+    {
+        int ridx = registers.index(loc);
+        RegMask msk = loc.getMask();
+
+        for (size_t x = xregbase; x != graph.size(); ++x)
+        {
+            const Opndx& opndx = graph[x];
+            assert(opndx.alloc != 0);
+            if (opndx.ridx == ridx && opndx.alloc == msk)
+                return (int)x;
+        }
+    }
+
+    return -1;
+}
+
+
+void RegAlloc3::showGraph ()
+{
+#ifdef _DEBUG_REGALLOC3
+    log(LogStream::DBG) << "--- graph" << endl;
+    for (unsigned x = 0; x != graphsize; ++x)
+    {
+        const Opndx& opndx = graph.at(x);
+        log(LogStream::DBG).out()
+            << "(" << x << ") "
+            << (opndx.ignore ? "IGNORE " : "");
+
+            if (opndx.opnd != 0)
+                log(LogStream::DBG).out() << *opndx.opnd;
+            else
+                log(LogStream::DBG).out() << "REG";
+
+        log(LogStream::DBG).out()
+            << " ridx:" << opndx.ridx 
+            << " avail:" << hex << opndx.avail << " alloc:" << opndx.alloc << dec 
+            //<< " nbavails:" << opndx.nbavails
+            << " spillcost:" << opndx.spillcost;
+
+        if (!opndx.adjacents->empty())
+        {
+            Sep s;
+            log(LogStream::DBG) << " adjacents{";
+            for (RegAlloc3::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
+                log(LogStream::DBG).out() << s << *i;
+            log(LogStream::DBG) << "}";
+        }
+
+        if (opndx.neighbs != 0)
+        {
+            Sep s;
+            log(LogStream::DBG) << " neighbs{";
+            for (RegAlloc3::Indexes::const_iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i)
+                log(LogStream::DBG).out() << s << *i;
+            log(LogStream::DBG) << "}";
+        }
+
+        log(LogStream::DBG) << endl;
+    }
+    log(LogStream::DBG) << "---------" << endl;
+#endif
+}
+
+
 bool RegAlloc3::coalescing (int* opandmap, BoolMatrix& matrix)
 {
+    //static CountTime coalescingTimer("ia32::RegAlloc3::coalescing");
+    //AutoTimer tm(coalescingTimer);
+
     int x0, x1;
 
     const Nodes& nodes = irManager->getFlowGraph()->getNodesPostOrder();
-    for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) {
+    for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) 
+    {
         Node* node = *it;
         if (node->isBlockNode())
-        {
             for (const Inst*  inst = (Inst*)node->getLastInst(); inst != 0; inst = inst->getPrevInst())
                 if (inst->getMnemonic() == Mnemonic_MOV)
                     if ((x0 = opandmap[inst->getOpnd(0)->getId()]) != -1 &&
@@ -762,13 +918,58 @@
                         Opndx& opndx0 = graph.at(x0),
                              & opndx1 = graph.at(x1);
 
-                        if (opndx0.ridx  == opndx1.ridx && opndx0.avail == opndx1.avail)
+                        RegMask  avail = opndx0.avail & opndx1.avail;
+                        unsigned nbavails = bitCount(avail);
+
+                        if (opndx0.ridx != opndx1.ridx || nbavails == 0)
+                            continue;
+
+                        //if (opndx0.opnd->getSize() != opndx1.opnd->getSize())
+                        //    continue;
+
+                        Type* t0 = opndx0.opnd->getType(),
+                            * t1 = opndx1.opnd->getType();
+
+                        //if ((t0->isManagedPtr() || t0->isObject()) != (t1->isManagedPtr() || t1->isObject()))
+                        //    continue;
+
+                        if (!Type::mayAlias(&irManager->getTypeManager(), t0, t1))
+                            continue;
+
+                        DBGOUT("coalesce candidates (" << x0 << ") & (" << x1 << ") " << *inst << endl;)
+
+                        size_t   xdegree = 0, // estimated degree of the coalesced node
+                                 xcount  = 0; // number of neighbours with degree >= k
+
+                        xdegree = opndx0.adjacents->size() + opndx1.adjacents->size() 
+                                  - duplicates(opndx1.adjacents, matrix, x0, x1);
+
+                        for (Indexes::iterator ptr = opndx0.adjacents->begin(), end = opndx0.adjacents->end(); ptr != end; ++ptr)
                         {
-                            coalesce (opandmap, matrix, x0, x1);
-                            return true;
+                            Indexes* ixs = graph.at(*ptr).adjacents;
+                            size_t ndegree = ixs->size() - duplicates(ixs, matrix, x0, x1);
+                            if (ndegree >= nbavails)
+                                if (++xcount >= nbavails)
+                                    break;
                         }
+                        for (Indexes::iterator ptr = opndx1.adjacents->begin(), end = opndx1.adjacents->end(); ptr != end; ++ptr)
+                            if (!matrix.test(*ptr, x0))
+                            {
+                                Indexes* ixs = graph.at(*ptr).adjacents;
+                                size_t ndegree = ixs->size();
+                                if (ndegree >= nbavails)
+                                    if (++xcount >= nbavails)
+                                        break;
+                            }
+
+                        DBGOUT("xdegree:" << xdegree << " xcount:" << xcount << endl;)
+
+                        if (xcount >= nbavails || xdegree >= nbavails)
+                            continue;
+
+                        coalesce (opandmap, matrix, x0, x1);
+                        return true;
                     }
-        }
     }
 
     return false;
@@ -777,7 +978,7 @@
 
 //  Colalesce graph nodes (x0) and (x1) and the corresponding operands.
 //  Node (x1) not to be used anymore, (x0) must be used unstead.
-//  Note that (x1) remains in the graph
+//  Note that (x1) remains in the graph (must be ignored)
 //
 void RegAlloc3::coalesce (int* opandmap, BoolMatrix& matrix, int x0, int x1)
 {
@@ -786,7 +987,10 @@
     Opndx& opndx0 = graph.at(x0),
          & opndx1 = graph.at(x1);
 
-    opndx1.spilled = true;
+    opndx0.avail &= opndx1.avail;
+    opndx0.nbavails = bitCount(opndx0.avail);
+
+    opndx1.ignore = true;
 
     Opnd* opnd0 = opndx0.opnd,
         * opnd1 = opndx1.opnd;
@@ -802,137 +1006,168 @@
     opndx0.oproles->insert(opndx0.oproles->end(), opndx1.oproles->begin(), opndx1.oproles->end());
     opndx1.oproles->clear();
 
-    for (int x = 0; x < graphsize; ++x)
-        if (x != x1 && matrix.test(x, x1))
-        {
+    Indexes tmp(mm);    // list of trash indexes
+
+    for (Indexes::iterator ptr = opndx1.adjacents->begin(), end = opndx1.adjacents->end(); ptr != end;)
+    {
+        Indexes::iterator ptr_next = ptr;
+        ++ptr_next;
+
+        int x = *ptr;
+        assert(matrix.test(x, x1));
+        matrix.clear(x, x1);
+
+        Opndx& opndx = graph.at(x);
+        Indexes::iterator ptrx = find(opndx.adjacents->begin(), opndx.adjacents->end(), x1);
+        assert(ptrx != opndx.adjacents->end());
+
+        if (matrix.test(x, x0))
+        {// disconnect (x1 - x)
+            tmp.splice(tmp.end(), *opndx1.adjacents, ptr);
+            tmp.splice(tmp.end(), *opndx.adjacents, ptrx);
+        }
+        else
+        {// connect (x0 - x)
             matrix.set(x, x0);
-            matrix.clear(x, x1);
+            opndx0.adjacents->splice(opndx0.adjacents->end(), *opndx1.adjacents, ptr);  // x0 -> x
+            *ptrx = x0;                                                                 // x  -> x0
         }
 
+        ptr = ptr_next;
+    }
+
+    assert(opndx1.adjacents->empty());
+
     ++coalesceCount;
 }
 
 
-#ifndef _VAR0
+int RegAlloc3::duplicates (RegAlloc3::Indexes* list, RegAlloc3::BoolMatrix& matrix, int x0, int x1)
+{
+    int count = 0;
+
+    for (RegAlloc3::Indexes::iterator ptr = list->begin(), end = list->end(); ptr != end; ++ptr)
+        if (*ptr != x0 && *ptr != x1)
+            if (matrix.test(*ptr, x0) && matrix.test(*ptr, x1))
+                ++count;
+
+    return count;
+}
+
 
 struct sortRule1
 {
     const RegAlloc3::Graph& graph;
+    const unsigned int rule;
 
-
-    sortRule1 (const RegAlloc3::Graph& g)   :graph(g) {}
-
+    sortRule1 (const RegAlloc3::Graph& g, unsigned int r)   :graph(g), rule(r) {}
 
     bool operator () (int x1, int x2)
     {
         const RegAlloc3::Opndx& opndx1 = graph.at(x1),
                               & opndx2 = graph.at(x2);
 
-#ifdef _VAR1_
-        return opndx1.spillcost > opndx2.spillcost;
-#endif
-#ifdef _VAR2_
-        return opndx1.spillcost < opndx2.spillcost;
-#endif
-
+        return rule == 1 ? opndx1.spillcost > opndx2.spillcost
+                         : opndx1.spillcost < opndx2.spillcost;
     }
 };
 
-#endif
-
 
 void RegAlloc3::pruneGraph ()
 {
-    DBGOUT(endl << "pruneGraph - start"<< endl;)
+    static CountTime pruneGraphTimer("ia32::RegAlloc3::pruneGraph");
+    AutoTimer tm(pruneGraphTimer);
+
+    DBGOUT(endl << "pruneGraph"<< endl;)
 
-    size_t nbnodes = 0;
-    for (int i = 0; i != graphsize; ++i)
-        if (!graph.at(i).adjacents->empty())
+//  Calculate number of nodes that should be pruned off the graph
+    int nbnodes = 0;
+    for (unsigned i = 0; i != graphsize; ++i)
+        if (shouldPrune(graph.at(i)))
             nbnodes++;
 
     StlVector<int> tmp(mm);
 
     nstack.reserve(nbnodes);
-    while (nbnodes != 0)
+    while (nbnodes > 0)
     {
     //  Apply degree < R rule
 
-#ifdef _VAR0_
+        if (flag_SORT == 0)
 
-        for (bool succ = false; !succ;)
-        {
-            succ = true;
-            for (int i = 0; i != graphsize; ++i)
+            for (bool succ = false; !succ;)
             {
-                Opndx& opndx = graph.at(i);
-                const int n = (int)opndx.adjacents->size();
-                if (n != 0 && n < opndx.nbavails)
+                succ = true;
+                for (unsigned i = 0; i != graphsize; ++i)
                 {
-                    nbnodes -= graph.disconnect(i);
-                    nstack.push_back(i);
-                    succ = false;
-
-                    DBGOUT(" rule#1 (" << i << ")" << endl;)
+                    Opndx& opndx = graph.at(i);
+                    if (shouldPrune(opndx))
+                    {
+                        const size_t n = opndx.adjacents->size();
+                        if (n != 0 && n < opndx.nbavails)
+                        {
+                            nbnodes -= graph.disconnect(i);
+                            nstack.push_back(i);
+                            succ = false;
+                            //DBGOUT(" rule#1 (" << i << ")" << endl;)
+                        }
+                    }
                 }
             }
 
-        }
-
-#else
-
-        for (bool succ = false; !succ;)
-        {
-            succ = true;
-
-            tmp.resize(0);
+        else
 
-            for (int i = 0; i != graphsize; ++i)
+            for (bool succ = false; !succ;)
             {
-                Opndx& opndx = graph.at(i);
-                const int n = (int)opndx.adjacents->size();
-                if (n != 0 && n < opndx.nbavails)
-                    tmp.push_back(i);
-            }
+                succ = true;
 
-            if (tmp.size() != 0)
-            {
-                DBGOUT("buck size:" << tmp.size() << endl;)
-                if (tmp.size() > 1)
-                    sort(tmp.begin(), tmp.end(), sortRule1(graph));
+                tmp.resize(0);
 
-                for (StlVector<int>::iterator it = tmp.begin(); it != tmp.end(); ++it)
+                for (unsigned i = 0; i != graphsize; ++i)
                 {
-                    nbnodes -= graph.disconnect(*it);
-                    nstack.push_back(*it);
-
-                    DBGOUT(" rule#1 (" << *it << ")  cost:" << graph.at(*it).spillcost << endl;)
+                    Opndx& opndx = graph.at(i);
+                    if (shouldPrune(opndx))
+                    {
+                        const size_t n = opndx.adjacents->size();
+                        if (n != 0 && n < opndx.nbavails)
+                            tmp.push_back(i);
+                    }
                 }
 
-                succ = false;
-            }
-        }
+                if (tmp.size() != 0)
+                {
+                    if (tmp.size() > 1)
+                        sort(tmp.begin(), tmp.end(), sortRule1(graph, flag_SORT));
 
-#endif
+                    for (StlVector<int>::iterator it = tmp.begin(); it != tmp.end(); ++it)
+                    {
+                        nbnodes -= graph.disconnect(*it);
+                        nstack.push_back(*it);
+                    }
+
+                    succ = false;
+                }
+            }
 
     //  Apply degree >= R rule
 
-        if (nbnodes != 0)
+        if (nbnodes > 0)
         {
             int x = -1, n;
             double cost = 0, w;
 
         //  Find some node to disconnect 
-            for (int i = 0; i != graphsize; ++i)
+            for (unsigned i = 0; i != graphsize; ++i)
             {
                 Opndx& opndx = graph.at(i);
-                if ((n = (int)opndx.adjacents->size()) != 0)
-                {
-                    w = (double)opndx.spillcost/(double)n;
-                    DBGOUT("    (" << i << ") cost:" << w << endl;)
-                    if (x == -1 || w < cost)
-                        cost = w,
-                        x    = i;
-                }
+                if (shouldPrune(opndx))
+                    if ((n = (int)opndx.adjacents->size()) != 0)
+                    {
+                        w = opndx.spillcost/(double)n;
+                        if (x == -1 || w < cost)
+                            cost = w,
+                            x    = i;
+                    }
             }
 
             assert(x != -1);
@@ -940,18 +1175,27 @@
             {
                 nbnodes -= graph.disconnect(x);
                 nstack.push_back(x);
-
-                DBGOUT(" rule#2 (" << x << ") cost:" << cost << endl;)
             }
         }
     }
+}
+
 
-    DBGOUT("pruneGraph - stop"<< endl << graph;)
+bool RegAlloc3::shouldPrune (const Opndx& opndx) const
+{
+    return !opndx.ignore && opndx.alloc == 0 && !opndx.adjacents->empty();
 }
 
 
-void RegAlloc3::assignRegs ()
+bool RegAlloc3::assignRegs ()
 {
+    static CountTime assignRegsTimer("ia32::RegAlloc3::assignRegs");
+    AutoTimer tm(assignRegsTimer);
+
+    DBGOUT("assignRegs" << endl;)
+
+    int spilled = 0;
+
     while (!nstack.empty())
     {
         int x = nstack.back();
@@ -959,30 +1203,47 @@
 
         Opndx& opndx = graph.at(x);
         graph.reconnect(x);
-        opndx.spilled = !assignReg(x);
+        if (opndx.alloc == 0)
+        {
+            DBGOUT("(" << x << ")" << endl;)
+            opndx.spill = !assignReg(opndx);
+        }
     }
 
-    for (int x = 0; x != graphsize; ++x)
+    for (unsigned x = 0; x != graphsize; ++x)
     {
         Opndx& opndx = graph.at(x);
-        if (opndx.alloc == 0 && !opndx.spilled )
-            assignReg(x);
+        if (opndx.alloc == 0 && !opndx.spill && !opndx.ignore)
+        {
+            DBGOUT("(" << x << ")" << endl;)
+            opndx.spill = !assignReg(opndx);
+        }
+
+        if (opndx.spill)
+            ++spilled;
     }
+
+    DBGOUT("spilled " << spilled << " operands" << endl;)
+
+    return spilled == 0;
 }
 
 
-bool RegAlloc3::assignReg (int x)
+bool RegAlloc3::assignReg (Opndx& opndx)
 {
-    Opndx& opndx = graph.at(x);
     RegMask alloc = 0;
 
-    for (Opndx::Indexes::iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
-        alloc |= graph.at(*i).alloc;
+    assert(!opndx.ignore);
+    for (Indexes::iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
+    {
+        Opndx& opndz = graph.at(*i);
+        if (opndz.ridx == opndx.ridx)
+            alloc |= opndz.alloc;
+    }
 
     if ((alloc = opndx.avail & ~alloc) == 0)
     {
-        ++count_spilled;
-        DBGOUT("spilled (" << x << ")" << endl;)
+        DBGOUT("  assign " << *opndx.opnd << " failed" << endl;)
         return false;
     }
     else
@@ -990,7 +1251,7 @@
         if (opndx.neighbs != 0)
         {
             RegMask neighbs = 0;
-            for (Opndx::Indexes::iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i)
+            for (Indexes::iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i)
             {
                 Opndx& neigbx = graph.at(*i);
                 if (neigbx.ridx == opndx.ridx)
@@ -999,7 +1260,7 @@
 
             if ((neighbs & alloc) != 0 && neighbs != alloc)
             {
-                DBGOUT("! alloc:" << std::hex << alloc << " * neighbs:"  << neighbs << " =" << (alloc & neighbs) << std::dec << endl);
+                DBGOUT("  !alloc:" << std::hex << alloc << " * neighbs:"  << neighbs << " =" << (alloc & neighbs) << std::dec << endl);
                 alloc &= neighbs;
             }
         }
@@ -1007,41 +1268,109 @@
         opndx.alloc = findHighest(alloc);
         opndx.opnd->assignRegName(getRegName((OpndKind)registers[opndx.ridx].getKind(), 
                                              opndx.opnd->getSize(), 
-                                             (int)bitNumber(opndx.alloc)));
+                                             bitNumber(opndx.alloc)));
 
         ++count_assigned;
-        DBGOUT("assigned (" << x << ") = <" << getRegNameString(opndx.opnd->getRegName()) << ">" << endl;)
+        DBGOUT("  assigned " << *opndx.opnd << endl;)
         return true;
     }
 }
 
 
-//========================================================================================
-// Internal debug helpers
-//========================================================================================
+void RegAlloc3::spillRegs ()
+{
+    DBGOUT("spillRegs" << endl;)
 
+    int inserted = 0;
+
+    for (unsigned x = 0; x != graphsize; ++x)
+    {
+        Opndx& opndx = graph.at(x);
+        if (opndx.spill)
+            inserted += spillReg(opndx);
+    }
+
+    DBGOUT("inserted " << inserted << " operands" << endl;)
+}
 
-#ifdef _DEBUG__REGALLOC3
 
-static void onConstruct (const IRManager& irm)
+int RegAlloc3::spillReg (Opndx& opndx)
 {
-    MethodDesc& md = irm.getMethodDesc();
-    const char * methodName = md.getName();
-    const char * methodTypeName = md.getParentType()->getName();
-    dbgout << endl << "Constructed " << methodTypeName  << "." << methodName 
-           << "(" << md.getSignatureString() << ")" << endl;
+    Opnd* opnd = opndx.opnd;
+    const Constraint initial = opnd->getConstraint(Opnd::ConstraintKind_Initial);
+
+    if ((initial.getKind() & OpndKind_Memory) == 0)
+    {
+        DBGOUT("  spilling " << *opndx.opnd << " failed" << endl;)
+        return 0;
+    }
+
+    DBGOUT("  spilling " << *opndx.opnd << endl;)
+    opnd->setCalculatedConstraint(initial);
+    opnd->assignMemLocation(MemOpndKind_StackAutoLayout, irManager->getRegOpnd(STACK_REG), 0);
+
+    int inserted = 0;
+
+    for (Oproles::iterator ptr = opndx.oproles->begin(), end = opndx.oproles->end(); ptr != end; ++ptr)
+    {
+        Opnd* opndnew = getIRManager().newOpnd(opnd->getType(), initial);
+        Inst* inst = ptr->inst;
+        Inst* instnew = 0;
+        bool replaced = false;
+        if (ptr->role & Inst::OpndRole_Use)
+        {
+            instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opndnew, opnd);
+            instnew->insertBefore(inst);
+            replaced = inst->replaceOpnd(opnd, opndnew);
+            assert(replaced);
+            DBGOUT("    before " << *inst << " inserted " << *instnew << " MOV " << *opndnew << ", " << *opnd << endl;)
+        }
+
+        if (ptr->role & Inst::OpndRole_Def)
+        {
+            assert(!inst->hasKind(Inst::Kind_LocalControlTransferInst));  
+            if (!replaced)
+                replaced = inst->replaceOpnd(opnd, opndnew);
+            assert(replaced);
+            instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opnd, opndnew);
+            instnew->insertAfter(inst);
+            DBGOUT("    after  " << *inst << " inserted " << *instnew << " MOV " << *opnd << ", " << *opndnew << endl;)
+        }
+
+        Constraint c = initial;
+        update(instnew, opndnew, c);
+        update(inst,    opndnew, c);
+        opndnew->setCalculatedConstraint(c);
 
-//  sos = strcmp(methodTypeName, "spec/benchmarks/_213_javac/BatchEnvironment") == 0 &&
-//        strcmp(methodName,     "flushErrors") == 0;
+        ++inserted;
+    }
+
+    ++count_spilled;
+    return inserted;
 }
 
 
-static void onDestruct ()
+//  If currently handled operand is referenced by current instruction, then evaluate
+//  constraint of the operand imposed by this instruction and return 'true'.
+//  Otherwise, do nothing and return false.
+//
+int RegAlloc3::update (const Inst* inst, const Opnd* opnd, Constraint& constr) const
 {
-    dbgout << endl << "Destructed" << endl;
-}
+    int count = 0;
+    Inst::Opnds opnds(inst, Inst::OpndRole_All);
+    for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it))
+        if ( inst->getOpnd(it) == opnd)
+        {
+            Constraint c = inst->getConstraint(it, 0, constr.getSize());
+            if (constr.isNull())
+                constr = c;
+            else
+                constr.intersectWith(c);
 
-#endif //#ifdef _DEBUG__REGALLOC3
+            count++;    
+        }
+    return count;
+}
 
 
 //========================================================================================
@@ -1049,7 +1378,7 @@
 //========================================================================================
 
 
-#ifdef _DEBUG__REGALLOC3
+#ifdef _DEBUG_REGALLOC3
 
 static ostream& operator << (ostream& os, Sep& x)
 {
@@ -1105,11 +1434,11 @@
 {
     Sep s;;
     os << "{";
-    for (size_t rk = 0; rk != registers.size(); ++rk)
+    for (unsigned rk = 0; rk != registers.size(); ++rk)
     {
         RegAlloc3::RegMask msk = x[rk];
 
-        for (size_t rx = 0; msk != 0; ++rx, msk >>= 1)
+        for (unsigned rx = 0; msk != 0; ++rx, msk >>= 1)
             if ((msk & 1) != 0)
             {
                 RegName reg = getRegName((OpndKind)registers[rk].getKind(), registers[rk].getSize(), rx);
@@ -1130,7 +1459,7 @@
     {
         Sep s;
         os << " adjacents{";
-        for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
+        for (RegAlloc3::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i)
             os << s << *i;
         os << "}";
     }
@@ -1139,7 +1468,7 @@
     {
         Sep s;
         os << " hiddens{";
-        for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.hiddens->begin(); i != opndx.hiddens->end(); ++i)
+        for (RegAlloc3::Indexes::const_iterator i = opndx.hiddens->begin(); i != opndx.hiddens->end(); ++i)
             os << s << *i;
         os << "}";
     }
@@ -1158,7 +1487,7 @@
 }
 
 
-#endif //#ifdef _DEBUG__REGALLOC3
+#endif //#ifdef _DEBUG_REGALLOC3
 
 } //namespace Ia32
 } //namespace Jitrino

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp?view=diff&rev=538551&r1=538550&r2=538551
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp Wed May 16 04:58:35 2007
@@ -193,7 +193,6 @@
 
     bool   evicts_known;
 
-
 //  Methods
 //  -------
 
@@ -579,6 +578,13 @@
     onConstruct(*this);
 #endif
 
+    const bool* spill_flag = static_cast<const bool*>(getIRManager().getInfo("SpillGen"));
+    if (spill_flag != 0 && !*spill_flag)
+    {
+        DBGOUT("SpillGen skipped" << endl;);
+        return;
+    }
+
     registers.parse(getArg("regs"));
     DBGOUT("parameters: " << registers << endl;)
     irManager->resetOpndConstraints();
@@ -652,6 +658,7 @@
 
     static Counter<size_t> count_emits("ia32:spillgen:emits", 0);
     count_emits += emitted;
+
 #ifdef _DEBUG_SPILLGEN
     log(LogStream::DBG) << endl << "Emitted movs :" << emitted << endl;
 
@@ -869,7 +876,6 @@
 
     assert(instxp == instxs.begin());
     return actives.size();
-
 }
 
 
@@ -896,11 +902,9 @@
 
     lives_catch = 0;
     Node* node = bblock->getExceptionEdgeTarget();
-    if (node!=NULL) {
-        if ((lives_catch = irManager->getLiveAtEntry(node))->isEmpty()) {
+    if (node != NULL) 
+        if ((lives_catch = irManager->getLiveAtEntry(node))->isEmpty()) 
             lives_catch = 0;
-        }
-    }
 
     for (Oplines::reverse_iterator it = actives.rbegin(); it != actives.rend(); ++it)
     {
@@ -935,12 +939,13 @@
 
     //  Process instructions that are using the operand
 
-        while (opline.go()) {
+        while (opline.go()) 
             if (opline.isProc())
             {
                 Constraint c(opline.opnd->getConstraint(Opnd::ConstraintKind_Initial));
                 update(opline.instx->inst, opline.opnd, c);
                 opline.idx = registers.getIndex(c);
+
                 if (!tryRegister(opline, c, prefreg))
                     if (!tryMemory(opline, c))
                         if (!tryEvict(opline, c))
@@ -950,7 +955,10 @@
                                 ++fails;
 
                                 if (simplify(opline.instx->inst, opline.opnd))
+                                {
+                                    loadOpndMem(opline);
                                     break;
+                                }
 
                                 loadOpnd(opline, opline.instx, opline.opnd);
 
@@ -966,11 +974,10 @@
             {
                 opline.forw();
             }
-        }
 
     //  End-block processing
 
-        assert(opline.instx == opline.ops->front().instx);
+        //assert(opline.instx == opline.ops->front().instx);
         if (opline.at_exit)
             loadOpndMem(opline);
     }
@@ -1012,6 +1019,7 @@
     Constraint cr((OpndKind)cx.getKind(), c.getSize(), cx.getMask());
 
 //  handle first instruction of the interval
+
     RegMask mk = cr.getMask() & ~usedRegs(opline.instx, opline.idx, opline.isUse());
     if (mk == 0)
     {
@@ -1025,7 +1033,6 @@
 
 //  handle second and all others instructions
 
-
     Instx* begx = opline.instx;
     Instx* endx = begx;
     int count = 0;
@@ -1059,7 +1066,8 @@
         mkpost = callRegs(opline.instx, opline.idx);
         count += cnt;
     }
-    DBGOUT("   -reg [" << (const Inst*)begx->inst << " - " << (const Inst*)endx->inst 
+
+    DBGOUT("   -reg [" << *begx->inst << " - " << *endx->inst 
         << "] avail:" << RegMasks(cx, mk) << " count:" << count << endl;)
 
     if (count == 0)
@@ -1073,6 +1081,7 @@
             return true;
         }
     }
+
     if ((mk & prefreg) != 0)
     {
         mk &= prefreg;
@@ -1241,14 +1250,34 @@
 
 bool SpillGen::simplify (Inst* inst, Opnd* opnd)
 {
+    Opnd* opndm;
     if (inst->hasKind(Inst::Kind_LocalControlTransferInst) && inst->getOpndCount() == 1)
     {
-        Opnd* opndm = inst->getOpnd(0);
+        opndm = inst->getOpnd(0);
         Opnd* opndx = irManager->newMemOpnd(opndm->getType(), 
                                            MemOpndKind_StackAutoLayout, 
-                                           irManager->getRegOpnd(RegName_ESP), 
+                                           irManager->getRegOpnd(STACK_REG), 
                                            0);
-        DBGOUT("simplify " << *opndm << " -> mem " << *opndx << endl;);
+        DBGOUT("simplify jump " << *opndm << " -> mem " << *opndx << endl;);
+        emitMove(true, inst,opndx, opndm);
+        inst->replaceOpnd(opndm, opndx);
+
+        for (int so = 0; so != MemOpndSubOpndKind_Count; ++so)
+        {
+            Opnd* sub = opndm->getMemOpndSubOpnd(static_cast<MemOpndSubOpndKind>(so));
+            if (sub != 0)
+                actsmap[sub->getId()] = 0;
+        }
+        return true;
+    }
+
+    else if (inst->hasKind(Inst::Kind_CallInst) && (opndm = inst->getOpnd(0)) == opnd)
+    {
+        Opnd* opndx = irManager->newMemOpnd(opndm->getType(), 
+                                           MemOpndKind_StackAutoLayout, 
+                                           irManager->getRegOpnd(STACK_REG), 
+                                           0);
+        DBGOUT("simplify call " << *opndm << " -> mem " << *opndx << endl;);
         emitMove(true, inst,opndx, opndm);
         inst->replaceOpnd(opndm, opndx);
 
@@ -1384,6 +1413,7 @@
            << " - " << *endx->inst << "] to " << *opline.opnd_mem << endl;)
 
     loadOpnd(opline, begx, opline.opnd_mem);
+
     if (opline.opnd != opline.opnd_mem)
         for (; begx <= endx; ++begx)
         {
@@ -1842,6 +1872,7 @@
 } //namespace Ia32
 
 } //namespace Jitrino
+
 
 
 



Mime
View raw message