harmony-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Egor Pasko <egor.pa...@gmail.com>
Subject Re: [drlvm] class unloading: vtable marks benchmarking
Date Fri, 24 Nov 2006 05:02:52 GMT
Salikh,

cool! :)

As I remember discussions.. a number unconditional writes may show a
different picture on multiprocessor machines because of massive cache
invalidations between processors. Is it easy to get similar statistics
for multiprocessor machines?

On the 0x22A day of Apache Harmony Salikh Zakirov wrote:
> Hi,
> 
> As a result of numerous class unloading discussions, we
> I've hacked vtable marking proposals into GC_CC directly, and measured
> their impact on the performance. I've attached the two patches
> corresponding to "vtable marks" and "indirect marks".
> 
> Benchmark: dacapo-2006-10 hsqldb
> Machine: IBM Thinkpad T41p, Pentium M 1700 MHz (1 core), 1 Gb
> Windows XP SP2, MSVC 7.0, release build
> Benchmark arguments:
> 
>   java -verbose:gc -jar c:/work/dacapo/dacapo-2006-10.jar -s default -n
> 3 hsqldb
> 
> Benchmarks results:
> 
> no vtable marks:	===== DaCapo hsqldb PASSED in 6168 msec =====
> vtable marks:		===== DaCapo hsqldb PASSED in 6218 msec =====
> (0.8% slowdown)
> indirect marks:		===== DaCapo hsqldb PASSED in 6409 msec =====
> (3.9% slowdown)
> 
> Garbage collection times:
> (garbage collection times were collected for the whole dacapo run,
> including warmup benchmark runs).
> 
> no vtable marks:
> COMPACT avg  614.375 +/- 117.537 =  4915.000 / 8, min   50.000, max 911.000
> COPY    avg  255.000 +/- 39.325 =  2040.000 / 8, min   90.000, max  490.000
> FORCED  avg  189.333 +/- 7.589 =  2840.000 / 15, min  140.000, max  240.000
> 
> vtable marks:
> COMPACT avg  615.500 +/- 119.544 =  4924.000 / 8, min   40.000, max  931.000
> COPY    avg  260.000 +/- 27.839 =  2340.000 / 9, min  160.000, max  460.000
> FORCED  avg  186.667 +/- 7.411 =  2800.000 / 15, min  140.000, max  240.000
> 
> indirect marks:
> COMPACT avg  619.375 +/- 123.104 =  4955.000 / 8, min   30.000, max  941.000
> COPY    avg  265.000 +/- 38.868 =  2120.000 / 8, min  110.000, max  500.000
> FORCED  avg  194.000 +/- 8.095 =  2910.000 / 15, min  150.000, max  250.000
> 
> Resume: as was predicted, adding unconditional write to object scanning
> does not have much impact on the garbage collection time. However,
> overall impact is visible on benchmark level.
> 
> Regarding the false sharing wnen writing vtable marks,
> the benchmarking should be run on a multiprocessor machine and with a
> parallel GC.
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..a4663fc 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      if (pos >= heap.compaction_region_start() && pos < heap.compaction_region_end())
{
> diff --git vm/gc_cc/src/collect_forced.cpp vm/gc_cc/src/collect_forced.cpp
> index 072f21e..92bf167 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>  
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..985b94e 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>  
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..849aaf0 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark, 
> +    /// used for class unloading
> +    size_t mark;
>  
>      void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/vtable.h vm/vmcore/include/vtable.h
> index a1fc8b4..eb08687 100644
> --- vm/vmcore/include/vtable.h
> +++ vm/vmcore/include/vtable.h
> @@ -53,6 +53,7 @@ typedef struct Intfc_Table {
>  
>  typedef struct VTable {
>      Byte _gc_private_information[GC_BYTES_IN_VTABLE];
> +    size_t mark;
>      Class* clss;
>  
>      // See the masks in vm_for_gc.h.
> diff --git vm/gc_cc/src/collect_copy.cpp vm/gc_cc/src/collect_copy.cpp
> index a3b6a96..c2caac2 100644
> --- vm/gc_cc/src/collect_copy.cpp
> +++ vm/gc_cc/src/collect_copy.cpp
> @@ -168,6 +168,7 @@ static bool gc_copy_process_reference(Sl
>      // move the object?
>  #define pos ((unsigned char*) obj)
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      if (pos >= heap.compaction_region_start() && pos < heap.compaction_region_end())
{
> diff --git vm/gc_cc/src/collect_forced.cpp vm/gc_cc/src/collect_forced.cpp
> index 072f21e..7e4de43 100644
> --- vm/gc_cc/src/collect_forced.cpp
> +++ vm/gc_cc/src/collect_forced.cpp
> @@ -64,6 +64,7 @@ static void forced_process_reference(Par
>      obj->obj_info() = (info & ~MARK_BITS) | heap_mark_phase;
>  
>      Partial_Reveal_VTable *vtable = obj->vtable();
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      if (gcvt->is_array()) { // is array
> diff --git vm/gc_cc/src/collect_slide_compact.cpp vm/gc_cc/src/collect_slide_compact.cpp
> index e5b4f54..4a3ee9c 100644
> --- vm/gc_cc/src/collect_slide_compact.cpp
> +++ vm/gc_cc/src/collect_slide_compact.cpp
> @@ -454,6 +454,7 @@ static void slide_process_object(Partial
>      assert(obj->vt() & ~RESCAN_BIT); // has vt
>  
>      Partial_Reveal_VTable *vtable = ah_to_vtable(vt & ~RESCAN_BIT);
> +    *vtable->mark = 1;
>      GC_VTable_Info *gcvt = vtable->get_gcvt();
>  
>      // process slots
> diff --git vm/gc_cc/src/gc_types.h vm/gc_cc/src/gc_types.h
> index 1ac4236..da9a48c 100644
> --- vm/gc_cc/src/gc_types.h
> +++ vm/gc_cc/src/gc_types.h
> @@ -152,6 +152,9 @@ typedef struct Partial_Reveal_VTable {
>  private:
>      GC_VTable_Info *gcvt;
>  public:
> +    /// pointer to the class reachability mark, 
> +    /// used for class unloading
> +    size_t *mark;
>  
>      void set_gcvt(struct GC_VTable_Info *new_gcvt) { gcvt = new_gcvt; }
>      struct GC_VTable_Info *get_gcvt() { return gcvt; }
> diff --git vm/vmcore/include/Class.h vm/vmcore/include/Class.h
> index 7194edb..a6c198c 100644
> --- vm/vmcore/include/Class.h
> +++ vm/vmcore/include/Class.h
> @@ -772,6 +772,8 @@ enum AccessAndPropertiesFlags {
>   * calling the verifier, preparing, resolving and initializing the class.*/
>  
>  struct Class {
> +    /// mark used for the class unloading
> +    size_t mark;
>  private:
>      typedef struct {
>          union {

-- 
Egor Pasko


Mime
View raw message