tvm-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tqc...@apache.org
Subject [incubator-tvm-site] branch asf-site updated: Docs build at Mon Apr 20 12:15:54 PDT 2020
Date Mon, 20 Apr 2020 19:16:05 GMT
This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 4123191  Docs build at Mon Apr 20 12:15:54 PDT 2020
4123191 is described below

commit 4123191830867396aef81eb5c19546150b39b07f
Author: tqchen <tqchen@octoml.ai>
AuthorDate: Mon Apr 20 12:15:54 2020 -0700

    Docs build at Mon Apr 20 12:15:54 PDT 2020
---
 .../low_level_custom_pass.py                       |   12 +-
 .../low_level_custom_pass.ipynb                    |   26 +-
 docs/_sources/api/python/topi.rst.txt              |    2 +
 docs/_sources/langref/relay_op.rst.txt             |    1 +
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   16 +-
 .../tutorials/autotvm/tune_conv2d_cuda.rst.txt     |   42 +-
 .../tutorials/autotvm/tune_simple_template.rst.txt |   20 +-
 .../tutorials/dev/low_level_custom_pass.rst.txt    |   48 +-
 .../tutorials/dev/sg_execution_times.rst.txt       |    6 +-
 .../frontend/deploy_model_on_android.rst.txt       |    2 +-
 .../tutorials/frontend/deploy_prequantized.rst.txt |   11 +-
 .../tutorials/frontend/deploy_ssd_gluoncv.rst.txt  |   78 +-
 docs/_sources/tutorials/frontend/from_onnx.rst.txt |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   34 +-
 docs/_sources/tutorials/language/reduction.rst.txt |  101 +-
 docs/_sources/tutorials/language/scan.rst.txt      |   69 +-
 .../tutorials/language/schedule_primitives.rst.txt |  133 +-
 .../tutorials/language/sg_execution_times.rst.txt  |   18 +-
 docs/_sources/tutorials/language/tensorize.rst.txt |   48 +-
 .../tutorials/language/tuple_inputs.rst.txt        |   51 +-
 .../tutorials/optimize/opt_conv_cuda.rst.txt       |    2 +-
 .../tutorials/optimize/opt_conv_tensorcore.rst.txt |  218 +-
 docs/_sources/tutorials/optimize/opt_gemm.rst.txt  |  227 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   10 +-
 docs/_sources/tutorials/relay_quick_start.rst.txt  |    2 +-
 docs/_sources/tutorials/sg_execution_times.rst.txt |    8 +-
 docs/_sources/tutorials/topi/intro_topi.rst.txt    |  533 +--
 .../tutorials/topi/sg_execution_times.rst.txt      |    4 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../vta/tutorials/autotvm/tune_relay_vta.rst.txt   |   14 +-
 .../frontend/deploy_classification.rst.txt         |    4 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../_sources/vta/tutorials/matrix_multiply.rst.txt |  159 +-
 .../vta/tutorials/optimize/convolution_opt.rst.txt |  385 +-
 .../tutorials/optimize/matrix_multiply_opt.rst.txt |  263 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../vta/tutorials/sg_execution_times.rst.txt       |    6 +-
 .../_sources/vta/tutorials/vta_get_started.rst.txt |   70 +-
 docs/api/python/driver.html                        |    2 +-
 docs/api/python/ir.html                            |    9 +-
 docs/api/python/relay/analysis.html                |   25 +-
 docs/api/python/relay/frontend.html                |  134 +-
 docs/api/python/relay/index.html                   |  139 +-
 docs/api/python/relay/nn.html                      |   87 +-
 docs/api/python/relay/transform.html               |   83 +-
 docs/api/python/te.html                            |   24 +-
 docs/api/python/tir.html                           |  424 +-
 docs/api/python/topi.html                          |   52 +-
 docs/doxygen/analyzer_8h_source.html               |    2 +-
 docs/doxygen/annotated.html                        |  338 +-
 docs/doxygen/bias__add_8h_source.html              |    2 +-
 docs/doxygen/bound_8h.html                         |    6 +-
 docs/doxygen/bound_8h_source.html                  |    5 +-
 docs/doxygen/broadcast_8h.html                     |    8 +
 docs/doxygen/broadcast_8h_source.html              |   45 +-
 docs/doxygen/buffer_8h_source.html                 |    2 +-
 docs/doxygen/c__runtime__api_8h_source.html        |    2 +-
 docs/doxygen/classes.html                          |  239 +-
 docs/doxygen/classtvm_1_1BaseAttrsNode.html        |   10 +-
 .../doxygen/classtvm_1_1tir_1_1BufferLoadNode.html |    2 +-
 ... classtvm_1_1tir_1_1BufferRealize-members.html} |   26 +-
 ....html => classtvm_1_1tir_1_1BufferRealize.html} |   56 +-
 ...sstvm_1_1tir_1_1BufferRealizeNode-members.html} |   27 +-
 ...l => classtvm_1_1tir_1_1BufferRealizeNode.html} |  200 +-
 ...vm_1_1tir_1_1BufferRealizeNode__coll__graph.svg |  203 +
 ..._1tir_1_1BufferRealizeNode__inherit__graph.svg} |   81 +-
 ...asstvm_1_1tir_1_1BufferRealize__coll__graph.svg |   56 +
 ...tvm_1_1tir_1_1BufferRealize__inherit__graph.svg |   56 +
 docs/doxygen/classtvm_1_1tir_1_1BufferStore.html   |    8 +-
 .../classtvm_1_1tir_1_1BufferStoreNode.html        |    8 +-
 docs/doxygen/classtvm_1_1tir_1_1CallNode.html      |    3 +
 ...ml => classtvm_1_1tir_1_1Prefetch-members.html} |   26 +-
 ...Store.html => classtvm_1_1tir_1_1Prefetch.html} |   54 +-
 .../classtvm_1_1tir_1_1PrefetchNode-members.html   |   11 +-
 docs/doxygen/classtvm_1_1tir_1_1PrefetchNode.html  |  116 +-
 ...lasstvm_1_1tir_1_1PrefetchNode__coll__graph.svg |  192 +-
 ...stvm_1_1tir_1_1PrefetchNode__inherit__graph.svg |   75 +-
 .../classtvm_1_1tir_1_1Prefetch__coll__graph.svg   |   56 +
 ...classtvm_1_1tir_1_1Prefetch__inherit__graph.svg |   56 +
 docs/doxygen/classtvm_1_1tir_1_1ProvideNode.html   |    1 +
 docs/doxygen/classtvm_1_1tir_1_1RealizeNode.html   |    1 +
 docs/doxygen/classtvm_1_1tir_1_1Stmt.html          |    2 +-
 ...classtvm_1_1tir_1_1StmtExprMutator-members.html |   15 +-
 .../classtvm_1_1tir_1_1StmtExprMutator.html        |    2 +
 ...stvm_1_1tir_1_1StmtExprMutator__coll__graph.svg |    2 +-
 ...m_1_1tir_1_1StmtExprMutator__inherit__graph.svg |    2 +-
 ...classtvm_1_1tir_1_1StmtExprVisitor-members.html |   15 +-
 .../classtvm_1_1tir_1_1StmtExprVisitor.html        |    6 +-
 ...stvm_1_1tir_1_1StmtExprVisitor__coll__graph.svg |   86 +-
 ...m_1_1tir_1_1StmtExprVisitor__inherit__graph.svg |   86 +-
 ...tmt_01_6n_00_01Args_8_8_8args_08_4-members.html |   19 +-
 ...onst_01Stmt_01_6n_00_01Args_8_8_8args_08_4.html |   38 +
 ...01_6n_00_01Args_8_8_8args_08_4__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1StmtMutator-members.html    |   15 +-
 docs/doxygen/classtvm_1_1tir_1_1StmtMutator.html   |   26 +
 ...classtvm_1_1tir_1_1StmtMutator__coll__graph.svg |    2 +-
 ...sstvm_1_1tir_1_1StmtMutator__inherit__graph.svg |    2 +-
 docs/doxygen/classtvm_1_1tir_1_1StmtNode.html      |    2 +-
 ...classtvm_1_1tir_1_1StmtNode__inherit__graph.svg |  411 +-
 .../classtvm_1_1tir_1_1StmtVisitor-members.html    |   15 +-
 docs/doxygen/classtvm_1_1tir_1_1StmtVisitor.html   |   30 +-
 ...classtvm_1_1tir_1_1StmtVisitor__coll__graph.svg |   44 +-
 ...sstvm_1_1tir_1_1StmtVisitor__inherit__graph.svg |   48 +-
 .../classtvm_1_1tir_1_1Stmt__inherit__graph.svg    |  124 +-
 docs/doxygen/codegen_8h_source.html                |    2 +-
 docs/doxygen/cuda_2dense_8h.html                   |    2 +-
 docs/doxygen/cuda_2dense_8h__incl.svg              | 1632 ++++----
 docs/doxygen/cuda_2injective_8h.html               |    2 +-
 docs/doxygen/cuda_2injective_8h__incl.svg          | 1734 ++++-----
 docs/doxygen/cuda_2normalization_8h.html           |    2 +-
 docs/doxygen/cuda_2normalization_8h__incl.svg      | 1759 +++++----
 docs/doxygen/cuda_2pooling_8h.html                 |    2 +-
 docs/doxygen/cuda_2pooling_8h__incl.svg            | 1705 ++++----
 docs/doxygen/cuda_2reduction_8h.html               |    2 +-
 docs/doxygen/cuda_2reduction_8h__incl.svg          | 1734 ++++-----
 docs/doxygen/cuda_2softmax_8h.html                 |    2 +-
 docs/doxygen/cuda_2softmax_8h__incl.svg            | 1734 ++++-----
 docs/doxygen/data__type_8h__dep__incl.svg          |    2 +-
 docs/doxygen/detail_2extern_8h_source.html         |    6 +-
 docs/doxygen/dir_000019_000010.html                |    2 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad_dep.svg   |    4 +-
 .../dir_72c2f11201cd7636dc7624de0754daa5_dep.svg   |    4 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |    4 +-
 .../dir_f97d855a3173728370e632aa77170e34_dep.svg   |    4 +-
 docs/doxygen/driver__api_8h.html                   |    2 +-
 docs/doxygen/driver__api_8h__incl.svg              | 1468 ++++---
 docs/doxygen/elemwise_8h_source.html               |    6 +-
 docs/doxygen/env__func_8h_source.html              |    2 +-
 docs/doxygen/feature_8h_source.html                |    2 +-
 docs/doxygen/functions__.html                      |    1 +
 docs/doxygen/functions_a.html                      |    1 +
 docs/doxygen/functions_b.html                      |   14 +-
 docs/doxygen/functions_c.html                      |    6 +-
 docs/doxygen/functions_d.html                      |    1 -
 docs/doxygen/functions_e.html                      |    3 +-
 docs/doxygen/functions_f.html                      |    3 +-
 docs/doxygen/functions_func_b.html                 |    8 +-
 docs/doxygen/functions_func_m.html                 |    1 -
 docs/doxygen/functions_func_p.html                 |    6 +
 docs/doxygen/functions_func_s.html                 |    2 +
 docs/doxygen/functions_func_t.html                 |   12 +-
 docs/doxygen/functions_func_v.html                 |   23 +-
 docs/doxygen/functions_m.html                      |    3 +-
 docs/doxygen/functions_n.html                      |    3 +
 docs/doxygen/functions_p.html                      |    6 +
 docs/doxygen/functions_s.html                      |    9 +-
 docs/doxygen/functions_t.html                      |   12 +-
 docs/doxygen/functions_u.html                      |    2 +-
 docs/doxygen/functions_v.html                      |   24 +-
 docs/doxygen/functions_vars.html                   |    1 +
 docs/doxygen/functions_vars_a.html                 |    1 +
 docs/doxygen/functions_vars_b.html                 |    6 +-
 docs/doxygen/functions_vars_c.html                 |    2 +
 docs/doxygen/functions_vars_d.html                 |    1 -
 docs/doxygen/functions_vars_e.html                 |    1 +
 docs/doxygen/functions_vars_f.html                 |    1 -
 docs/doxygen/functions_vars_n.html                 |    3 +
 docs/doxygen/functions_vars_s.html                 |    1 +
 docs/doxygen/functions_vars_v.html                 |    1 -
 docs/doxygen/functor_8h__dep__incl.svg             |    2 +-
 docs/doxygen/generic_2default_8h.html              |    2 +-
 docs/doxygen/generic_2default_8h__incl.svg         | 1734 ++++-----
 docs/doxygen/generic_2extern_8h.html               |    2 +-
 docs/doxygen/generic_2extern_8h__incl.svg          | 1731 +++++----
 docs/doxygen/generic_2injective_8h.html            |    2 +-
 docs/doxygen/generic_2injective_8h__incl.svg       | 1734 ++++-----
 docs/doxygen/hierarchy.html                        |  478 +--
 .../include_2tvm_2ir_2transform_8h_source.html     |    2 +-
 .../doxygen/include_2tvm_2relay_2attrs_2nn_8h.html |    3 +
 .../include_2tvm_2relay_2attrs_2nn_8h_source.html  |   67 +-
 .../include_2tvm_2relay_2transform_8h_source.html  |    2 +-
 docs/doxygen/include_2tvm_2tir_2transform_8h.html  |   45 +
 .../include_2tvm_2tir_2transform_8h_source.html    |   20 +-
 docs/doxygen/inherit_graph_10.svg                  | 1737 +++++----
 docs/doxygen/inherit_graph_5.svg                   | 4054 ++++++++++----------
 docs/doxygen/inherits.html                         |    4 +-
 docs/doxygen/int__solver_8h_source.html            |    2 +-
 docs/doxygen/interpreter_8h_source.html            |    2 +-
 docs/doxygen/ir_2adt_8h_source.html                |    2 +-
 docs/doxygen/ir_2attrs_8h.html                     |    2 +-
 docs/doxygen/ir_2attrs_8h__dep__incl.svg           |  816 ++--
 docs/doxygen/ir_2attrs_8h_source.html              |    2 +-
 docs/doxygen/ir_2expr_8h_source.html               |    2 +-
 docs/doxygen/ir_2function_8h.html                  |    2 +-
 docs/doxygen/ir_2function_8h__dep__incl.svg        |  846 ++--
 docs/doxygen/ir_2module_8h_source.html             |    2 +-
 docs/doxygen/ir_2op_8h_source.html                 |    2 +-
 docs/doxygen/ir_2type_8h_source.html               |    2 +-
 docs/doxygen/ir__pass_8h.html                      |   54 -
 docs/doxygen/ir__pass_8h_source.html               |   20 +-
 docs/doxygen/local__response__norm_8h_source.html  |    2 +-
 docs/doxygen/namespacemembers_c.html               |    2 +-
 docs/doxygen/namespacemembers_d.html               |    2 +-
 docs/doxygen/namespacemembers_func_c.html          |    2 +-
 docs/doxygen/namespacemembers_func_d.html          |    2 +-
 docs/doxygen/namespacemembers_func_i.html          |   10 +-
 docs/doxygen/namespacemembers_func_l.html          |   12 +-
 docs/doxygen/namespacemembers_func_n.html          |    5 +-
 docs/doxygen/namespacemembers_func_r.html          |    9 +-
 docs/doxygen/namespacemembers_func_s.html          |   18 +-
 docs/doxygen/namespacemembers_func_u.html          |    2 +-
 docs/doxygen/namespacemembers_func_v.html          |    2 +-
 docs/doxygen/namespacemembers_i.html               |   10 +-
 docs/doxygen/namespacemembers_l.html               |   12 +-
 docs/doxygen/namespacemembers_n.html               |    5 +-
 docs/doxygen/namespacemembers_r.html               |    9 +-
 docs/doxygen/namespacemembers_s.html               |   16 +-
 docs/doxygen/namespacemembers_u.html               |    2 +-
 docs/doxygen/namespacemembers_v.html               |    2 +-
 docs/doxygen/namespacetopi.html                    |  806 ++--
 docs/doxygen/namespacetvm_1_1arith.html            |   24 +-
 docs/doxygen/namespacetvm_1_1relay.html            |    3 +
 docs/doxygen/namespacetvm_1_1te.html               |  228 +-
 docs/doxygen/namespacetvm_1_1tir.html              |  655 +---
 docs/doxygen/namespacetvm_1_1tir_1_1intrinsic.html |    5 +-
 docs/doxygen/namespacetvm_1_1tir_1_1transform.html |  411 ++
 docs/doxygen/node_2container_8h__dep__incl.svg     |    2 +-
 docs/doxygen/node_8h__dep__incl.svg                |    2 +-
 docs/doxygen/reflection_8h_source.html             |    2 +-
 docs/doxygen/relay_2adt_8h_source.html             |    2 +-
 docs/doxygen/relay_2analysis_8h_source.html        |    2 +-
 docs/doxygen/relay_2expr_8h_source.html            |    2 +-
 docs/doxygen/relay_2function_8h_source.html        |    2 +-
 docs/doxygen/relay_2type_8h_source.html            |    2 +-
 docs/doxygen/repr__printer_8h__dep__incl.svg       |    2 +-
 docs/doxygen/rocm_2dense_8h__incl.svg              |    2 +-
 docs/doxygen/rocm_2injective_8h.html               |    2 +-
 docs/doxygen/rocm_2injective_8h__incl.svg          | 1766 ++++-----
 docs/doxygen/rocm_2pooling_8h.html                 |    2 +-
 docs/doxygen/rocm_2pooling_8h__incl.svg            | 1789 +++++----
 docs/doxygen/rocm_2reduction_8h.html               |    2 +-
 docs/doxygen/rocm_2reduction_8h__incl.svg          | 1766 ++++-----
 docs/doxygen/rocm_2softmax_8h.html                 |    2 +-
 docs/doxygen/rocm_2softmax_8h__incl.svg            | 1766 ++++-----
 docs/doxygen/runtime_2container_8h.html            |    2 +-
 docs/doxygen/runtime_2container_8h__dep__incl.svg  | 1164 +++---
 docs/doxygen/runtime_2memory_8h_source.html        |    2 +-
 docs/doxygen/schedule__pass_8h.html                |   21 +-
 docs/doxygen/schedule__pass_8h__incl.svg           | 1903 ++++-----
 docs/doxygen/schedule__pass_8h_source.html         |    5 +-
 docs/doxygen/search/all_1.js                       |    2 +-
 docs/doxygen/search/all_10.js                      |    2 +-
 docs/doxygen/search/all_11.js                      |    6 +-
 docs/doxygen/search/all_13.js                      |    5 +-
 docs/doxygen/search/all_14.js                      |   23 +-
 docs/doxygen/search/all_15.js                      |   10 +-
 docs/doxygen/search/all_16.js                      |    4 +-
 docs/doxygen/search/all_17.js                      |   14 +-
 docs/doxygen/search/all_2.js                       |   16 +-
 docs/doxygen/search/all_3.js                       |   14 +-
 docs/doxygen/search/all_4.js                       |    8 +-
 docs/doxygen/search/all_5.js                       |    4 +-
 docs/doxygen/search/all_6.js                       |    2 +-
 docs/doxygen/search/all_7.js                       |    2 +-
 docs/doxygen/search/all_8.js                       |    9 +-
 docs/doxygen/search/all_a.js                       |   10 +-
 docs/doxygen/search/all_d.js                       |    6 +-
 docs/doxygen/search/all_e.js                       |    2 +-
 docs/doxygen/search/all_f.js                       |    3 +-
 docs/doxygen/search/classes_0.js                   |    2 +
 docs/doxygen/search/classes_1.js                   |    2 +
 docs/doxygen/search/classes_13.js                  |    6 +-
 docs/doxygen/search/classes_6.js                   |    1 +
 docs/doxygen/search/classes_d.js                   |    1 +
 docs/doxygen/search/functions_10.js                |    3 +-
 docs/doxygen/search/functions_11.js                |    5 +-
 docs/doxygen/search/functions_12.js                |   15 +-
 docs/doxygen/search/functions_13.js                |    6 +-
 docs/doxygen/search/functions_14.js                |    2 +-
 docs/doxygen/search/functions_15.js                |    6 +-
 docs/doxygen/search/functions_2.js                 |    2 +
 docs/doxygen/search/functions_3.js                 |    2 +-
 docs/doxygen/search/functions_4.js                 |    2 +-
 docs/doxygen/search/functions_9.js                 |   10 +-
 docs/doxygen/search/functions_c.js                 |    6 +-
 docs/doxygen/search/functions_d.js                 |    2 +-
 docs/doxygen/search/functions_e.js                 |    2 +-
 docs/doxygen/search/functions_f.js                 |    2 +-
 docs/doxygen/search/variables_0.js                 |    2 +-
 docs/doxygen/search/variables_1.js                 |    2 +-
 docs/doxygen/search/variables_11.js                |    2 +-
 docs/doxygen/search/variables_14.js                |    2 +-
 docs/doxygen/search/variables_2.js                 |    6 +-
 docs/doxygen/search/variables_3.js                 |    4 +-
 docs/doxygen/search/variables_4.js                 |    2 +-
 docs/doxygen/search/variables_5.js                 |    2 +-
 docs/doxygen/search/variables_6.js                 |    2 +-
 docs/doxygen/search/variables_d.js                 |    1 +
 docs/doxygen/span_8h_source.html                   |    2 +-
 docs/doxygen/stmt_8h.html                          |   15 +-
 docs/doxygen/stmt_8h__dep__incl.svg                |  973 ++---
 docs/doxygen/stmt_8h_source.html                   |  275 +-
 docs/doxygen/stmt__functor_8h_source.html          |   45 +-
 ...ructtvm_1_1relay_1_1GroupNormAttrs-members.html |  126 +
 .../structtvm_1_1relay_1_1GroupNormAttrs.html      |  287 ++
 ...tvm_1_1relay_1_1GroupNormAttrs__coll__graph.svg |   91 +
 ..._1_1relay_1_1GroupNormAttrs__inherit__graph.svg |   91 +
 docs/doxygen/tensor__type_8h_source.html           |    2 +-
 docs/doxygen/tir_2analysis_8h_source.html          |    4 +-
 docs/doxygen/tir_2expr_8h__dep__incl.svg           |    2 +-
 docs/doxygen/tir_2expr_8h_source.html              |  184 +-
 docs/doxygen/tir_2expr__functor_8h_source.html     |    4 +-
 docs/doxygen/tir_2function_8h.html                 |    2 +-
 docs/doxygen/tir_2function_8h__dep__incl.svg       |  651 +++-
 docs/doxygen/tir_2function_8h_source.html          |    2 +-
 docs/doxygen/tir_2op_8h_source.html                |    4 +-
 docs/doxygen/type__relation_8h_source.html         |    2 +-
 docs/doxygen/var_8h__dep__incl.svg                 |    2 +-
 docs/doxygen/var_8h_source.html                    |    2 +-
 docs/doxygen/x86_2default_8h.html                  |    2 +-
 docs/doxygen/x86_2default_8h__incl.svg             | 1736 ++++-----
 docs/genindex.html                                 |   70 +-
 docs/langref/relay_op.html                         |   37 +-
 docs/objects.inv                                   |  Bin 13811 -> 13939 bytes
 docs/searchindex.js                                |    2 +-
 docs/tutorials/autotvm/sg_execution_times.html     |   14 +-
 docs/tutorials/autotvm/tune_conv2d_cuda.html       |   42 +-
 docs/tutorials/autotvm/tune_simple_template.html   |   20 +-
 docs/tutorials/dev/low_level_custom_pass.html      |   32 +-
 docs/tutorials/dev/sg_execution_times.html         |    6 +-
 .../frontend/deploy_model_on_android.html          |    2 +-
 docs/tutorials/frontend/deploy_prequantized.html   |   13 +-
 docs/tutorials/frontend/deploy_ssd_gluoncv.html    |   80 +-
 docs/tutorials/frontend/from_onnx.html             |    6 +-
 docs/tutorials/frontend/sg_execution_times.html    |   34 +-
 docs/tutorials/language/reduction.html             |   96 +-
 docs/tutorials/language/scan.html                  |   66 +-
 docs/tutorials/language/schedule_primitives.html   |  122 +-
 docs/tutorials/language/sg_execution_times.html    |   18 +-
 docs/tutorials/language/tensorize.html             |   44 +-
 docs/tutorials/language/tuple_inputs.html          |   48 +-
 docs/tutorials/optimize/opt_conv_cuda.html         |    2 +-
 docs/tutorials/optimize/opt_conv_tensorcore.html   |  216 +-
 docs/tutorials/optimize/opt_gemm.html              |  220 +-
 docs/tutorials/optimize/sg_execution_times.html    |   10 +-
 docs/tutorials/relay_quick_start.html              |  102 +-
 docs/tutorials/sg_execution_times.html             |    8 +-
 docs/tutorials/topi/intro_topi.html                |  528 +--
 docs/tutorials/topi/sg_execution_times.html        |    4 +-
 docs/vta/tutorials/autotvm/sg_execution_times.html |    4 +-
 docs/vta/tutorials/autotvm/tune_relay_vta.html     |  196 +-
 .../tutorials/frontend/deploy_classification.html  |   18 +-
 docs/vta/tutorials/frontend/deploy_detection.html  |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 docs/vta/tutorials/matrix_multiply.html            |  156 +-
 docs/vta/tutorials/optimize/convolution_opt.html   |  382 +-
 .../tutorials/optimize/matrix_multiply_opt.html    |  260 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/vta/tutorials/sg_execution_times.html         |    6 +-
 docs/vta/tutorials/vta_get_started.html            |   68 +-
 351 files changed, 27247 insertions(+), 24855 deletions(-)

diff --git a/docs/_downloads/9cf0213876be0a9cc4aaa52a1ebd9586/low_level_custom_pass.py b/docs/_downloads/9cf0213876be0a9cc4aaa52a1ebd9586/low_level_custom_pass.py
index 25ca279..d35913b 100644
--- a/docs/_downloads/9cf0213876be0a9cc4aaa52a1ebd9586/low_level_custom_pass.py
+++ b/docs/_downloads/9cf0213876be0a9cc4aaa52a1ebd9586/low_level_custom_pass.py
@@ -40,8 +40,6 @@ Before reading this tutorial, we assume readers have already known these topics
   take a look at ``python/tvm/build_module.py`` to get some basics.
 
 """
-
-from __future__ import absolute_import, print_function
 import tvm
 from tvm import te
 import numpy as np
@@ -57,7 +55,7 @@ b = te.placeholder((n, ), name="b")
 c = te.compute((n, ), lambda i: a[i] + b[i], name='c')
 
 sch = te.create_schedule(c.op)
-ir  = tvm.lower(sch, [a, b, c], simple_mode=True)
+ir  = tvm.lower(sch, [a, b, c])
 print(ir)
 
 ######################################################################
@@ -137,12 +135,8 @@ def vectorize(stmt):
 # Glue to Lowering
 # ----------------
 # So far, we are done with writing this IR transformation pass. What we need to do next is to glue
-# this pass to TVM's lower pass. We can first call this function directly as a sanity check.
+# this pass to TVM's lower pass.
 #
-
-print(vectorize(ir))
-
-#####################################################################
 # In TVM, there is a property called ``BuildConfig``. You can use this property to customize your
 # own lowering options. In this case, we inject the pass written above into the TVM standard lowering
 # pass by feeding **a list of tuple** as argument to ``add_lower_pass``. "Tuple" indicates different
@@ -160,7 +154,7 @@ print(vectorize(ir))
 #
 
 with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
-    print(tvm.lower(sch, [a, b, c], simple_mode=True))
+    print(tvm.lower(sch, [a, b, c]))
 
 #####################################################################
 # Quick View
diff --git a/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb b/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
index cfbd59b..f86cc6c 100644
--- a/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
+++ b/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\nimport tvm\nfrom tvm import te\nimport numpy as np"
+        "import tvm\nfrom tvm import te\nimport numpy as np"
       ]
     },
     {
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "n = tvm.tir.const(128, \"int32\")\na = te.placeholder((n, ), name=\"a\")\nb = te.placeholder((n, ), name=\"b\")\nc = te.compute((n, ), lambda i: a[i] + b[i], name='c')\n\nsch = te.create_schedule(c.op)\nir  = tvm.lower(sch, [a, b, c], simple_mode=True)\nprint(ir)"
+        "n = tvm.tir.const(128, \"int32\")\na = te.placeholder((n, ), name=\"a\")\nb = te.placeholder((n, ), name=\"b\")\nc = te.compute((n, ), lambda i: a[i] + b[i], name='c')\n\nsch = te.create_schedule(c.op)\nir  = tvm.lower(sch, [a, b, c])\nprint(ir)"
       ]
     },
     {
@@ -94,7 +94,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Glue to Lowering\n----------------\nSo far, we are done with writing this IR transformation pass. What we need to do next is to glue\nthis pass to TVM's lower pass. We can first call this function directly as a sanity check.\n\n\n"
+        "Glue to Lowering\n----------------\nSo far, we are done with writing this IR transformation pass. What we need to do next is to glue\nthis pass to TVM's lower pass.\n\nIn TVM, there is a property called ``BuildConfig``. You can use this property to customize your\nown lowering options. In this case, we inject the pass written above into the TVM standard lowering\npass by feeding **a list of tuple** as argument to ``add_lower_pass``. \"Tuple\" indicates different\nphases of lower [...]
       ]
     },
     {
@@ -105,25 +105,7 @@
       },
       "outputs": [],
       "source": [
-        "print(vectorize(ir))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "In TVM, there is a property called ``BuildConfig``. You can use this property to customize your\nown lowering options. In this case, we inject the pass written above into the TVM standard lowering\npass by feeding **a list of tuple** as argument to ``add_lower_pass``. \"Tuple\" indicates different\nphases of lowering. In TVM, there are four phases of lowering and user-customized ones will be\ncalled after each phase is done.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>Her [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:\n    print(tvm.lower(sch, [a, b, c], simple_mode=True))"
+        "with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:\n    print(tvm.lower(sch, [a, b, c]))"
       ]
     },
     {
diff --git a/docs/_sources/api/python/topi.rst.txt b/docs/_sources/api/python/topi.rst.txt
index e6a2c38..cef2999 100644
--- a/docs/_sources/api/python/topi.rst.txt
+++ b/docs/_sources/api/python/topi.rst.txt
@@ -99,6 +99,7 @@ List of operators
    topi.logical_and
    topi.logical_or
    topi.logical_not
+   topi.logical_xor
    topi.arange
    topi.stack
    topi.repeat
@@ -193,6 +194,7 @@ topi
 .. autofunction:: topi.logical_and
 .. autofunction:: topi.logical_or
 .. autofunction:: topi.logical_not
+.. autofunction:: topi.logical_xor
 
 topi.nn
 ~~~~~~~
diff --git a/docs/_sources/langref/relay_op.rst.txt b/docs/_sources/langref/relay_op.rst.txt
index f1d7d44..798d440 100644
--- a/docs/_sources/langref/relay_op.rst.txt
+++ b/docs/_sources/langref/relay_op.rst.txt
@@ -150,6 +150,7 @@ This level enables additional math and transform operators.
    tvm.relay.logical_and
    tvm.relay.logical_or
    tvm.relay.logical_not
+   tvm.relay.logical_xor
    tvm.relay.maximum
    tvm.relay.minimum
    tvm.relay.power
diff --git a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
index 400f907..18c87dc 100644
--- a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**00:52.428** total execution time for **tutorials_autotvm** files:
-
-- **00:27.421**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
-- **00:24.417**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
-- **00:00.174**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.139**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.139**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.139**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+**00:48.352** total execution time for **tutorials_autotvm** files:
+
+- **00:26.490**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
+- **00:21.203**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:00.181**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
+- **00:00.159**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:00.159**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
+- **00:00.159**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
diff --git a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
index f781b24..683a73f 100644
--- a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
@@ -234,26 +234,26 @@ for this template
        7 unroll_explicit: OtherOption([0, 1]) len=2
     )
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 2   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 3   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 4   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 5   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 6   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 7   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 8   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 9   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 10  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 11  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 12  GFLOPS: 58.18/58.18     result: MeasureResult(costs=(0.0039788533076923075,), error_no=0, all_cost=2.067173719406128, timestamp=1586993556.6456518)     [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8533140
-    No: 13  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 14  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 15  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 16  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 17  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 18  GFLOPS: 0.00/58.18      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (1) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (0) /workspace/build/libtvm.so(+0xd0976b) [0x7f90b157576b]\n  File "tvm/_ffi/_cython/./packed_func.pxi", line 55, in tvm._ffi._cy3.core.tvm_callback\n  File "/workspace/docs/../python/tvm/autotvm/measure/measure_methods.py", line 621, in verify_pass\n    raise InstantiationError("Skipped b [...]
-    No: 19  GFLOPS: 747.82/747.82   result: MeasureResult(costs=(0.00030956918693693695,), error_no=0, all_cost=2.488834857940674, timestamp=1586993561.323387)     [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
-    No: 20  GFLOPS: 6.97/747.82     result: MeasureResult(costs=(0.033197459,), error_no=0, all_cost=2.1923890113830566, timestamp=1586993562.7962627)      [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7502165
+    No: 1   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 2   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 3   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 4   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 5   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 6   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 7   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 8   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 9   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 10  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 11  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 12  GFLOPS: 58.21/58.21     result: MeasureResult(costs=(0.003977298038461538,), error_no=0, all_cost=1.471078634262085, timestamp=1587363156.3709424)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8533140
+    No: 13  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 14  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 15  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 16  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 17  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 18  GFLOPS: 0.00/58.21      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (4) /workspace/build/libtvm.so(+0x49a8ff) [0x7f897a2cd8ff]\n  [bt] (3) /workspace/build/libtvm.so(tvm::transform::Pass::operator()(tvm::IRModule) const+0x70) [0x7f897a2d0bd0]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::Pa [...]
+    No: 19  GFLOPS: 771.09/771.09   result: MeasureResult(costs=(0.000300224651214128,), error_no=0, all_cost=2.0936989784240723, timestamp=1587363160.1702943)     [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
+    No: 20  GFLOPS: 6.98/771.09     result: MeasureResult(costs=(0.03315011025,), error_no=0, all_cost=1.8245627880096436, timestamp=1587363161.2054307)    [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7502165
 
 
 
@@ -307,7 +307,7 @@ and measure running time.
 
     Best config:
     [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
-    Time cost of this operator: 0.000355
+    Time cost of this operator: 0.000358
 
 
 
diff --git a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
index 86f4def..15f2dae 100644
--- a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
@@ -361,16 +361,16 @@ used to get the best config later.
  .. code-block:: none
 
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 6.97/6.97       result: MeasureResult(costs=(0.0385383866,), error_no=0, all_cost=0.8664705753326416, timestamp=1586993522.78784)       [('tile_y', [-1, 8]), ('tile_x', [-1, 8])],None,33
-    No: 2   GFLOPS: 2.32/6.97       result: MeasureResult(costs=(0.11549231280000001,), error_no=0, all_cost=2.101508140563965, timestamp=1586993525.0130222)       [('tile_y', [-1, 8]), ('tile_x', [-1, 2])],None,13
-    No: 3   GFLOPS: 7.35/7.35       result: MeasureResult(costs=(0.0365371688,), error_no=0, all_cost=1.1348650455474854, timestamp=1586993525.9961812)     [('tile_y', [-1, 16]), ('tile_x', [-1, 128])],None,74
-    No: 4   GFLOPS: 4.52/7.35       result: MeasureResult(costs=(0.05943379880000001,), error_no=0, all_cost=1.2699289321899414, timestamp=1586993527.329105)       [('tile_y', [-1, 16]), ('tile_x', [-1, 32])],None,54
-    No: 5   GFLOPS: 6.34/7.35       result: MeasureResult(costs=(0.0423189818,), error_no=0, all_cost=0.9372684955596924, timestamp=1586993528.3820336)     [('tile_y', [-1, 128]), ('tile_x', [-1, 8])],None,37
-    No: 6   GFLOPS: 6.80/7.35       result: MeasureResult(costs=(0.0394898064,), error_no=0, all_cost=0.9311320781707764, timestamp=1586993529.3924904)     [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
-    No: 7   GFLOPS: 27.03/27.03     result: MeasureResult(costs=(0.0099325676,), error_no=0, all_cost=0.5125160217285156, timestamp=1586993529.9309843)     [('tile_y', [-1, 1]), ('tile_x', [-1, 128])],None,70
-    No: 8   GFLOPS: 20.77/27.03     result: MeasureResult(costs=(0.012926028400000001,), error_no=0, all_cost=0.4918792247772217, timestamp=1586993530.5227294)     [('tile_y', [-1, 4]), ('tile_x', [-1, 512])],None,92
-    No: 9   GFLOPS: 0.78/27.03      result: MeasureResult(costs=(0.344844209,), error_no=0, all_cost=5.782715082168579, timestamp=1586993536.4192333)       [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 10  GFLOPS: 0.00/27.03      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (3) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]\n  [bt] (2) /workspace/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::runtime::RPCModuleNode::WrapRemote(void*)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime:: [...]
+    No: 1   GFLOPS: 7.06/7.06       result: MeasureResult(costs=(0.0380279692,), error_no=0, all_cost=0.8919658660888672, timestamp=1587363126.3145273)     [('tile_y', [-1, 8]), ('tile_x', [-1, 8])],None,33
+    No: 2   GFLOPS: 2.30/7.06       result: MeasureResult(costs=(0.1165274164,), error_no=0, all_cost=2.136989116668701, timestamp=1587363128.4989433)      [('tile_y', [-1, 8]), ('tile_x', [-1, 2])],None,13
+    No: 3   GFLOPS: 7.41/7.41       result: MeasureResult(costs=(0.036214165799999996,), error_no=0, all_cost=1.2049956321716309, timestamp=1587363129.400848)      [('tile_y', [-1, 16]), ('tile_x', [-1, 128])],None,74
+    No: 4   GFLOPS: 4.44/7.41       result: MeasureResult(costs=(0.0604868886,), error_no=0, all_cost=1.3611621856689453, timestamp=1587363130.683009)      [('tile_y', [-1, 16]), ('tile_x', [-1, 32])],None,54
+    No: 5   GFLOPS: 6.06/7.41       result: MeasureResult(costs=(0.0442902372,), error_no=0, all_cost=1.147874116897583, timestamp=1587363131.7490911)      [('tile_y', [-1, 128]), ('tile_x', [-1, 8])],None,37
+    No: 6   GFLOPS: 6.40/7.41       result: MeasureResult(costs=(0.041963698199999996,), error_no=0, all_cost=1.095632791519165, timestamp=1587363132.7843227)      [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
+    No: 7   GFLOPS: 25.96/25.96     result: MeasureResult(costs=(0.010339799600000001,), error_no=0, all_cost=0.6244151592254639, timestamp=1587363133.274794)      [('tile_y', [-1, 1]), ('tile_x', [-1, 128])],None,70
+    No: 8   GFLOPS: 20.39/25.96     result: MeasureResult(costs=(0.0131632686,), error_no=0, all_cost=0.4899017810821533, timestamp=1587363133.7990336)     [('tile_y', [-1, 4]), ('tile_x', [-1, 512])],None,92
+    No: 9   GFLOPS: 0.83/25.96      result: MeasureResult(costs=(0.3225978646,), error_no=0, all_cost=5.420862436294556, timestamp=1587363139.2854187)      [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 10  GFLOPS: 0.00/25.96      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (3) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]\n  [bt] (2) /workspace/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::runtime::RPCModuleNode::WrapRemote(void*)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#1}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime:: [...]
 
 
 
diff --git a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
index ede8747..3b18a9a 100644
--- a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
+++ b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
@@ -35,8 +35,6 @@ Before reading this tutorial, we assume readers have already known these topics
 
 .. code-block:: default
 
-
-    from __future__ import absolute_import, print_function
     import tvm
     from tvm import te
     import numpy as np
@@ -61,7 +59,7 @@ our customized lowering pass to manipulate the IR directly instead of using sche
     c = te.compute((n, ), lambda i: a[i] + b[i], name='c')
 
     sch = te.create_schedule(c.op)
-    ir  = tvm.lower(sch, [a, b, c], simple_mode=True)
+    ir  = tvm.lower(sch, [a, b, c])
     print(ir)
 
 
@@ -74,13 +72,16 @@ our customized lowering pass to manipulate the IR directly instead of using sche
 
  .. code-block:: none
 
-    for (i, 0, 128) {
-      c[i] = (a[i] + b[i])
+    PrimFunc([a, b, c]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, 128) {
+        c[i] = (a[i] + b[i])
+      }
     }
 
 
 
 
+
 Writing a Pass
 --------------
 Essentially, an "IR transformation pass" is a function which maps a statement to a new statement.
@@ -174,33 +175,7 @@ this value.
 Glue to Lowering
 ----------------
 So far, we are done with writing this IR transformation pass. What we need to do next is to glue
-this pass to TVM's lower pass. We can first call this function directly as a sanity check.
-
-
-
-.. code-block:: default
-
-
-    print(vectorize(ir))
-
-
-
-
-
-.. rst-class:: sphx-glr-script-out
-
- Out:
-
- .. code-block:: none
-
-    for (i.outer, 0, 16) {
-      vectorized (i.inner, 0, 8) {
-        c[((i.outer*8) + i.inner)] = (a[((i.outer*8) + i.inner)] + b[((i.outer*8) + i.inner)])
-      }
-    }
-
-
-
+this pass to TVM's lower pass.
 
 In TVM, there is a property called ``BuildConfig``. You can use this property to customize your
 own lowering options. In this case, we inject the pass written above into the TVM standard lowering
@@ -223,7 +198,7 @@ Thus, a good place to put this transformation pass is just after Phase 1.
 
 
     with tvm.target.build_config(add_lower_pass=[(1, vectorize)]) as cfg:
-        print(tvm.lower(sch, [a, b, c], simple_mode=True))
+        print(tvm.lower(sch, [a, b, c]))
 
 
 
@@ -235,13 +210,16 @@ Thus, a good place to put this transformation pass is just after Phase 1.
 
  .. code-block:: none
 
-    for (i.outer, 0, 16) {
-      c[ramp((i.outer*8), 1, 8)] = (a[ramp((i.outer*8), 1, 8)] + b[ramp((i.outer*8), 1, 8)])
+    PrimFunc([a, b, c]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, 16) {
+        c[ramp((i.outer*8), 1, 8)] = (a[ramp((i.outer*8), 1, 8)] + b[ramp((i.outer*8), 1, 8)])
+      }
     }
 
 
 
 
+
 Quick View
 ----------
 This tutorial gives a quick view of writing a customized IR transformation pass:
diff --git a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
index aed009f..0b8b0da 100644
--- a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:00.472** total execution time for **tutorials_dev** files:
+**00:00.568** total execution time for **tutorials_dev** files:
 
-- **00:00.332**: :ref:`sphx_glr_tutorials_dev_relay_pass_infra.py` (``relay_pass_infra.py``)
-- **00:00.140**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:00.407**: :ref:`sphx_glr_tutorials_dev_relay_pass_infra.py` (``relay_pass_infra.py``)
+- **00:00.160**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
index 2b92b6e..a7edc76 100644
--- a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
@@ -415,7 +415,7 @@ Execute on TVM
 
     TVM prediction top-1: tiger cat
     Evaluate inference time cost...
-    Mean inference time (std dev): 2.18 ms (0.29 ms)
+    Mean inference time (std dev): 11.31 ms (2.34 ms)
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
index 4229bca..078b8b6 100644
--- a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
@@ -190,15 +190,6 @@ training. Other models require a full post training calibration.
 
 
 
-.. rst-class:: sphx-glr-script-out
-
- Out:
-
- .. code-block:: none
-
-    Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     29%|##8       | 3.92M/13.6M [00:00<00:00, 41.1MB/s]
     47%|####6     | 6.34M/13.6M [00:00<00:00, 34.3MB/s]
     62%|######1   | 8.40M/13.6M [00:00<00:00, 27.6MB/s]
     88%|########7 | 11.9M/13.6M [00:00<00:00, 29.8MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 29.9MB/s]
-
 
 
 Quantize, trace and run the PyTorch Mobilenet v2 model
@@ -361,7 +352,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
  .. code-block:: none
 
-    Elapsed average ms: 6.2028009299999995
+    Elapsed average ms: 10.429512450000002
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
index df25838..242a3ce 100644
--- a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
@@ -169,6 +169,45 @@ Create TVM runtime and do inference
 
  .. code-block:: none
 
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (256, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (64, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (512, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (512, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (1024, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (1024, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (256, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (84, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (512, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (2048, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (2048, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (512, 2048, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (126, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 4, 4), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (16, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (24, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
@@ -224,45 +263,6 @@ Create TVM runtime and do inference
     Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (256, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (64, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (512, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (512, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (1024, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (1024, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (256, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (84, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (512, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (2048, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (2048, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (512, 2048, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (126, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 4, 4), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (16, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (24, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
 
 
 
diff --git a/docs/_sources/tutorials/frontend/from_onnx.rst.txt b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
index 257da52..0c24186 100644
--- a/docs/_sources/tutorials/frontend/from_onnx.rst.txt
+++ b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
@@ -151,7 +151,7 @@ Execute on TVM
 
  .. code-block:: none
 
-
    ...47%, 0.01 MB, 42 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 85 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 128 KB/s, 0 seconds passed
+
    ...47%, 0.01 MB, 25 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 51 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 77 KB/s, 0 seconds passed
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 224, 224), 'float32'), ('TENSOR', (9, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 224, 224), 'float32'), ('TENSOR', (32, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1, 224, 224), 'float32'), ('TENSOR', (64, 1, 5, 5), 'float32'), (1, 1), (2, 2, 2, 2), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
diff --git a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
index 59bf8d3..3815cf1 100644
--- a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,21 +5,21 @@
 
 Computation times
 =================
-**03:05.770** total execution time for **tutorials_frontend** files:
+**03:22.641** total execution time for **tutorials_frontend** files:
 
-- **00:26.319**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:22.821**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **00:19.364**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
-- **00:17.500**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:16.859**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
-- **00:15.361**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
-- **00:13.839**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:13.039**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
-- **00:10.709**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
-- **00:06.308**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:06.109**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
-- **00:05.230**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
-- **00:04.473**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
-- **00:04.422**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
-- **00:02.027**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.389**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
+- **00:31.258**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:25.665**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **00:19.719**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
+- **00:17.782**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
+- **00:17.119**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
+- **00:16.379**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
+- **00:14.433**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:13.210**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
+- **00:12.283**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
+- **00:09.090**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
+- **00:06.734**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:06.368**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
+- **00:04.564**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
+- **00:04.419**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
+- **00:02.054**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.564**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
diff --git a/docs/_sources/tutorials/language/reduction.rst.txt b/docs/_sources/tutorials/language/reduction.rst.txt
index 45df921..a06043f 100644
--- a/docs/_sources/tutorials/language/reduction.rst.txt
+++ b/docs/_sources/tutorials/language/reduction.rst.txt
@@ -92,16 +92,19 @@ Before doing anything, let us print out the IR code of default schedule.
 
  .. code-block:: none
 
-    for (i, 0, n) {
-      B[(i*stride)] = 0f
-      for (k, 0, m) {
-        B[(i*stride)] = (B[(i*stride)] + A[((i*stride) + (k*stride))])
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, n) {
+        B[(i*stride)] = 0f
+        for (k, 0, m) {
+          B[(i*stride)] = (B[(i*stride)] + A[((i*stride) + (k*stride))])
+        }
       }
     }
 
 
 
 
+
 You can find that the IR code is quite like the C code.
 The reduction axis is similar to a normal axis, it can be splitted.
 
@@ -126,16 +129,18 @@ axis by different factors. The result is a nested reduction.
 
  .. code-block:: none
 
-    for (i.outer, 0, floordiv((n + 31), 32)) {
-      for (i.inner, 0, 32) {
-        if (likely((((i.outer*32) + i.inner) < n))) {
-          B[(((i.outer*32) + i.inner)*stride)] = 0f
-        }
-        for (k.outer, 0, floordiv((m + 15), 16)) {
-          for (k.inner, 0, 16) {
-            if (likely((((i.outer*32) + i.inner) < n))) {
-              if (likely((((k.outer*16) + k.inner) < m))) {
-                B[(((i.outer*32) + i.inner)*stride)] = (B[(((i.outer*32) + i.inner)*stride)] + A[((((i.outer*32) + i.inner)*stride) + (((k.outer*16) + k.inner)*stride))])
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, floordiv((n + 31), 32)) {
+        for (i.inner, 0, 32) {
+          if (likely((((i.outer*32) + i.inner) < n))) {
+            B[(((i.outer*32) + i.inner)*stride)] = 0f
+          }
+          for (k.outer, 0, floordiv((m + 15), 16)) {
+            for (k.inner, 0, 16) {
+              if (likely((((i.outer*32) + i.inner) < n))) {
+                if (likely((((k.outer*16) + k.inner) < m))) {
+                  B[(((i.outer*32) + i.inner)*stride)] = (B[(((i.outer*32) + i.inner)*stride)] + A[((((i.outer*32) + i.inner)*stride) + (((k.outer*16) + k.inner)*stride))])
+                }
               }
             }
           }
@@ -146,6 +151,7 @@ axis by different factors. The result is a nested reduction.
 
 
 
+
 If we are building a GPU kernel, we can bind the rows of B to GPU threads.
 
 
@@ -165,16 +171,18 @@ If we are building a GPU kernel, we can bind the rows of B to GPU threads.
 
  .. code-block:: none
 
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 31), 32)
-    // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
-    if (likely((((blockIdx.x*32) + threadIdx.x) < n))) {
-      B[(((blockIdx.x*32) + threadIdx.x)*stride)] = 0f
-    }
-    for (k.outer, 0, floordiv((m + 15), 16)) {
-      for (k.inner, 0, 16) {
-        if (likely((((blockIdx.x*32) + threadIdx.x) < n))) {
-          if (likely((((k.outer*16) + k.inner) < m))) {
-            B[(((blockIdx.x*32) + threadIdx.x)*stride)] = (B[(((blockIdx.x*32) + threadIdx.x)*stride)] + A[((((blockIdx.x*32) + threadIdx.x)*stride) + (((k.outer*16) + k.inner)*stride))])
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 31), 32)
+      // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
+      if (likely((((blockIdx.x*32) + threadIdx.x) < n))) {
+        B[(((blockIdx.x*32) + threadIdx.x)*stride)] = 0f
+      }
+      for (k.outer, 0, floordiv((m + 15), 16)) {
+        for (k.inner, 0, 16) {
+          if (likely((((blockIdx.x*32) + threadIdx.x) < n))) {
+            if (likely((((k.outer*16) + k.inner) < m))) {
+              B[(((blockIdx.x*32) + threadIdx.x)*stride)] = (B[(((blockIdx.x*32) + threadIdx.x)*stride)] + A[((((blockIdx.x*32) + threadIdx.x)*stride) + (((k.outer*16) + k.inner)*stride))])
+            }
           }
         }
       }
@@ -183,6 +191,7 @@ If we are building a GPU kernel, we can bind the rows of B to GPU threads.
 
 
 
+
 Reduction Factoring and Parallelization
 ---------------------------------------
 One problem of building a reduction is that we cannot simply
@@ -213,28 +222,31 @@ result B.rf. The factored dimension becomes the first dimension of B.rf.
 
  .. code-block:: none
 
-    // attr [B.rf] storage_scope = "global"
-    allocate B.rf[float32 * (n*16)]
-    for (k.inner, 0, 16) {
-      for (i, 0, n) {
-        B.rf[((k.inner*n) + i)] = 0f
-        for (k.outer, 0, floordiv((m + 15), 16)) {
-          if (likely((((k.outer*16) + k.inner) < m))) {
-            B.rf[((k.inner*n) + i)] = (B.rf[((k.inner*n) + i)] + A[((i*stride) + (((k.outer*16) + k.inner)*stride))])
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [B.rf] storage_scope = "global"
+      allocate B.rf[float32 * (n*16)]
+      for (k.inner, 0, 16) {
+        for (i, 0, n) {
+          B.rf[((k.inner*n) + i)] = 0f
+          for (k.outer, 0, floordiv((m + 15), 16)) {
+            if (likely((((k.outer*16) + k.inner) < m))) {
+              B.rf[((k.inner*n) + i)] = (B.rf[((k.inner*n) + i)] + A[((i*stride) + (((k.outer*16) + k.inner)*stride))])
+            }
           }
         }
       }
-    }
-    for (ax0, 0, n) {
-      B[(ax0*stride)] = 0f
-      for (k.inner.v, 0, 16) {
-        B[(ax0*stride)] = (B[(ax0*stride)] + B.rf[((k.inner.v*n) + ax0)])
+      for (ax0, 0, n) {
+        B[(ax0*stride)] = 0f
+        for (k.inner.v, 0, 16) {
+          B[(ax0*stride)] = (B[(ax0*stride)] + B.rf[((k.inner.v*n) + ax0)])
+        }
       }
     }
 
 
 
 
+
 The scheduled operator of B also get rewritten to be sum over
 the first axis of reduced result of B.f
 
@@ -382,12 +394,14 @@ Here is an example for 2D convolution with filter size = [3, 3] and strides = [1
 
  .. code-block:: none
 
-    for (i, 0, (n - 2)) {
-      for (j, 0, (n - 2)) {
-        Output[((i*(n - 2)) + j)] = 0f
-        for (di, 0, 3) {
-          for (dj, 0, 3) {
-            Output[((i*(n - 2)) + j)] = (Output[((i*(n - 2)) + j)] + (Input[(((i + di)*stride) + ((j + dj)*stride))]*Filter[((di*3) + dj)]))
+    PrimFunc([Input, Filter, Output]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, (n - 2)) {
+        for (j, 0, (n - 2)) {
+          Output[((i*(n - 2)) + j)] = 0f
+          for (di, 0, 3) {
+            for (dj, 0, 3) {
+              Output[((i*(n - 2)) + j)] = (Output[((i*(n - 2)) + j)] + (Input[(((i + di)*stride) + ((j + dj)*stride))]*Filter[((di*3) + dj)]))
+            }
           }
         }
       }
@@ -396,6 +410,7 @@ Here is an example for 2D convolution with filter size = [3, 3] and strides = [1
 
 
 
+
 .. _general-reduction:
 
 Define General Commutative Reduction Operation
diff --git a/docs/_sources/tutorials/language/scan.rst.txt b/docs/_sources/tutorials/language/scan.rst.txt
index f39e872..d174f71 100644
--- a/docs/_sources/tutorials/language/scan.rst.txt
+++ b/docs/_sources/tutorials/language/scan.rst.txt
@@ -98,22 +98,25 @@ To split on the time iteration, user can schedule on scan_op.scan_axis instead.
 
  .. code-block:: none
 
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 255), 256)
-    // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 256
-    if (likely((((blockIdx.x*256) + threadIdx.x) < n))) {
-      scan[(((blockIdx.x*256) + threadIdx.x)*stride)] = X[(((blockIdx.x*256) + threadIdx.x)*stride)]
-    }
-    for (scan.idx, 0, (m - 1)) {
+    PrimFunc([X, scan]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
       // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 255), 256)
       // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 256
       if (likely((((blockIdx.x*256) + threadIdx.x) < n))) {
-        scan[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))] = (scan[((scan.idx*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))] + X[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))])
+        scan[(((blockIdx.x*256) + threadIdx.x)*stride)] = X[(((blockIdx.x*256) + threadIdx.x)*stride)]
+      }
+      for (scan.idx, 0, (m - 1)) {
+        // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 255), 256)
+        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 256
+        if (likely((((blockIdx.x*256) + threadIdx.x) < n))) {
+          scan[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))] = (scan[((scan.idx*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))] + X[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride))])
+        }
       }
     }
 
 
 
 
+
 Build and Verify
 ----------------
 We can build the scan kernel like other TVM kernels, here we use
@@ -190,21 +193,23 @@ the body of scan to be compute_at locations outside the scan loop.
 
  .. code-block:: none
 
-    // attr [s1] storage_scope = "global"
-    allocate s1[float32 * 32]
-    for (i, 0, n) {
-      scan[(i*stride)] = X[(i*stride)]
-    }
-    for (scan.idx, 0, (m - 1)) {
-      for (i.outer, 0, floordiv((n + 31), 32)) {
-        for (i, 0, 32) {
-          if (likely((((i.outer*32) + i) < n))) {
-            s1[i] = (scan[((scan.idx*stride) + (((i.outer*32) + i)*stride))]*2f)
+    PrimFunc([X, scan]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [s1] storage_scope = "global"
+      allocate s1[float32 * 32]
+      for (i, 0, n) {
+        scan[(i*stride)] = X[(i*stride)]
+      }
+      for (scan.idx, 0, (m - 1)) {
+        for (i.outer, 0, floordiv((n + 31), 32)) {
+          for (i, 0, 32) {
+            if (likely((((i.outer*32) + i) < n))) {
+              s1[i] = (scan[((scan.idx*stride) + (((i.outer*32) + i)*stride))]*2f)
+            }
           }
-        }
-        for (i.inner, 0, 32) {
-          if (likely((((i.outer*32) + i.inner) < n))) {
-            scan[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride))] = (s1[i.inner] + X[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride))])
+          for (i.inner, 0, 32) {
+            if (likely((((i.outer*32) + i.inner) < n))) {
+              scan[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride))] = (s1[i.inner] + X[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride))])
+            }
           }
         }
       }
@@ -213,6 +218,7 @@ the body of scan to be compute_at locations outside the scan loop.
 
 
 
+
 Multiple States
 ---------------
 For complicated applications like RNN, we might need more than one
@@ -249,24 +255,27 @@ The following example demonstrates how we can build recurrence with two states.
 
  .. code-block:: none
 
-    for (i, 0, n) {
-      scan.v0[(i*stride)] = X[(i*stride)]
-    }
-    for (i, 0, l) {
-      scan.v1[(i*stride)] = 0f
-    }
-    for (scan.idx, 0, (m - 1)) {
+    PrimFunc([X, scan.v0, scan.v1]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
       for (i, 0, n) {
-        scan.v0[(((scan.idx + 1)*stride) + (i*stride))] = (scan.v0[((scan.idx*stride) + (i*stride))] + X[(((scan.idx + 1)*stride) + (i*stride))])
+        scan.v0[(i*stride)] = X[(i*stride)]
       }
       for (i, 0, l) {
-        scan.v1[(((scan.idx + 1)*stride) + (i*stride))] = (scan.v1[((scan.idx*stride) + (i*stride))] + scan.v0[(scan.idx*stride)])
+        scan.v1[(i*stride)] = 0f
+      }
+      for (scan.idx, 0, (m - 1)) {
+        for (i, 0, n) {
+          scan.v0[(((scan.idx + 1)*stride) + (i*stride))] = (scan.v0[((scan.idx*stride) + (i*stride))] + X[(((scan.idx + 1)*stride) + (i*stride))])
+        }
+        for (i, 0, l) {
+          scan.v1[(((scan.idx + 1)*stride) + (i*stride))] = (scan.v1[((scan.idx*stride) + (i*stride))] + scan.v0[(scan.idx*stride)])
+        }
       }
     }
 
 
 
 
+
 Summary
 -------
 This tutorial provides a walk through of scan primitive.
diff --git a/docs/_sources/tutorials/language/schedule_primitives.rst.txt b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
index 5d25601..6a9e218 100644
--- a/docs/_sources/tutorials/language/schedule_primitives.rst.txt
+++ b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
@@ -83,15 +83,18 @@ schedule computes tensor in a serial manner in a row-major order.
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      for (j, 0, n) {
-        C[((i*stride) + (j*stride))] = (A[((i*stride) + (j*stride))]*B[((i*stride) + (j*stride))])
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        for (j, 0, n) {
+          C[((i*stride) + (j*stride))] = (A[((i*stride) + (j*stride))]*B[((i*stride) + (j*stride))])
+        }
       }
     }
 
 
 
 
+
 One schedule is composed by multiple stages, and one
 **Stage** represents schedule for one operation. We provide various
 methods to schedule every stage.
@@ -121,10 +124,12 @@ split
 
  .. code-block:: none
 
-    for (i.outer, 0, floordiv((m + 31), 32)) {
-      for (i.inner, 0, 32) {
-        if (likely((((i.outer*32) + i.inner) < m))) {
-          B[(((i.outer*32) + i.inner)*stride)] = (A[(((i.outer*32) + i.inner)*stride)]*2f)
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, floordiv((m + 31), 32)) {
+        for (i.inner, 0, 32) {
+          if (likely((((i.outer*32) + i.inner) < m))) {
+            B[(((i.outer*32) + i.inner)*stride)] = (A[(((i.outer*32) + i.inner)*stride)]*2f)
+          }
         }
       }
     }
@@ -132,6 +137,7 @@ split
 
 
 
+
 You can also split a axis by :code:`nparts`, which splits the axis
 contrary with :code:`factor`.
 
@@ -155,10 +161,12 @@ contrary with :code:`factor`.
 
  .. code-block:: none
 
-    for (i.outer, 0, 32) {
-      for (i.inner, 0, floordiv((m + 31), 32)) {
-        if (likely(((i.inner + (i.outer*floordiv((m + 31), 32))) < m))) {
-          B[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)] = A[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)]
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, 32) {
+        for (i.inner, 0, floordiv((m + 31), 32)) {
+          if (likely(((i.inner + (i.outer*floordiv((m + 31), 32))) < m))) {
+            B[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)] = A[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)]
+          }
         }
       }
     }
@@ -166,6 +174,7 @@ contrary with :code:`factor`.
 
 
 
+
 tile
 ----
 :code:`tile` help you execute the computation tile by tile over two
@@ -191,13 +200,15 @@ axises.
 
  .. code-block:: none
 
-    for (i.outer, 0, floordiv((m + 9), 10)) {
-      for (j.outer, 0, floordiv((n + 4), 5)) {
-        for (i.inner, 0, 10) {
-          for (j.inner, 0, 5) {
-            if (likely((((i.outer*10) + i.inner) < m))) {
-              if (likely((((j.outer*5) + j.inner) < n))) {
-                B[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))] = A[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))]
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, floordiv((m + 9), 10)) {
+        for (j.outer, 0, floordiv((n + 4), 5)) {
+          for (i.inner, 0, 10) {
+            for (j.inner, 0, 5) {
+              if (likely((((i.outer*10) + i.inner) < m))) {
+                if (likely((((j.outer*5) + j.inner) < n))) {
+                  B[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))] = A[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))]
+                }
               }
             }
           }
@@ -208,6 +219,7 @@ axises.
 
 
 
+
 fuse
 ----
 :code:`fuse` can fuse two consecutive axises of one computation.
@@ -235,12 +247,14 @@ fuse
 
  .. code-block:: none
 
-    for (i.outer, 0, floordiv((m + 9), 10)) {
-      for (j.outer, 0, floordiv((n + 4), 5)) {
-        for (i.inner.j.inner.fused, 0, 50) {
-          if (likely((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5)) < m))) {
-            if (likely((((j.outer*5) + floormod(i.inner.j.inner.fused, 5)) < n))) {
-              B[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride))] = A[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride))]
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.outer, 0, floordiv((m + 9), 10)) {
+        for (j.outer, 0, floordiv((n + 4), 5)) {
+          for (i.inner.j.inner.fused, 0, 50) {
+            if (likely((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5)) < m))) {
+              if (likely((((j.outer*5) + floormod(i.inner.j.inner.fused, 5)) < n))) {
+                B[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride))] = A[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride))]
+              }
             }
           }
         }
@@ -250,6 +264,7 @@ fuse
 
 
 
+
 reorder
 -------
 :code:`reorder` can reorder the axises in the specified order.
@@ -277,13 +292,15 @@ reorder
 
  .. code-block:: none
 
-    for (i.inner, 0, 10) {
-      for (j.outer, 0, floordiv((n + 4), 5)) {
-        for (i.outer, 0, floordiv((m + 9), 10)) {
-          for (j.inner, 0, 5) {
-            if (likely((((i.outer*10) + i.inner) < m))) {
-              if (likely((((j.outer*5) + j.inner) < n))) {
-                B[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))] = A[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))]
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i.inner, 0, 10) {
+        for (j.outer, 0, floordiv((n + 4), 5)) {
+          for (i.outer, 0, floordiv((m + 9), 10)) {
+            for (j.inner, 0, 5) {
+              if (likely((((i.outer*10) + i.inner) < m))) {
+                if (likely((((j.outer*5) + j.inner) < n))) {
+                  B[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))] = A[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride))]
+                }
               }
             }
           }
@@ -294,6 +311,7 @@ reorder
 
 
 
+
 bind
 ----
 :code:`bind` can bind a specified axis with a thread axis, often used
@@ -321,15 +339,18 @@ in gpu programming.
 
  .. code-block:: none
 
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 63), 64)
-    // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 64
-    if (likely((((blockIdx.x*64) + threadIdx.x) < n))) {
-      B[(((blockIdx.x*64) + threadIdx.x)*stride)] = (A[(((blockIdx.x*64) + threadIdx.x)*stride)]*2f)
+    PrimFunc([A, B]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = floordiv((n + 63), 64)
+      // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 64
+      if (likely((((blockIdx.x*64) + threadIdx.x) < n))) {
+        B[(((blockIdx.x*64) + threadIdx.x)*stride)] = (A[(((blockIdx.x*64) + threadIdx.x)*stride)]*2f)
+      }
     }
 
 
 
 
+
 compute_at
 ----------
 For a schedule that consists of multiple operators, TVM will compute
@@ -355,16 +376,19 @@ tensors at the root separately by default.
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      B[(i*stride)] = (A[(i*stride)] + 1f)
-    }
-    for (i, 0, m) {
-      C[(i*stride)] = (B[(i*stride)]*2f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        B[(i*stride)] = (A[(i*stride)] + 1f)
+      }
+      for (i, 0, m) {
+        C[(i*stride)] = (B[(i*stride)]*2f)
+      }
     }
 
 
 
 
+
 :code:`compute_at` can move computation of `B` into the first axis
 of computation of `C`.
 
@@ -389,14 +413,17 @@ of computation of `C`.
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      B[(i*stride)] = (A[(i*stride)] + 1f)
-      C[(i*stride)] = (B[(i*stride)]*2f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        B[(i*stride)] = (A[(i*stride)] + 1f)
+        C[(i*stride)] = (B[(i*stride)]*2f)
+      }
     }
 
 
 
 
+
 compute_inline
 --------------
 :code:`compute_inline` can mark one stage as inline, then the body of
@@ -424,13 +451,16 @@ tensor is required.
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      C[(i*stride)] = ((A[(i*stride)] + 1f)*2f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        C[(i*stride)] = ((A[(i*stride)] + 1f)*2f)
+      }
     }
 
 
 
 
+
 compute_root
 ------------
 :code:`compute_root` can move computation of one stage to the root.
@@ -457,16 +487,19 @@ compute_root
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      B[(i*stride)] = (A[(i*stride)] + 1f)
-    }
-    for (i, 0, m) {
-      C[(i*stride)] = (B[(i*stride)]*2f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        B[(i*stride)] = (A[(i*stride)] + 1f)
+      }
+      for (i, 0, m) {
+        C[(i*stride)] = (B[(i*stride)]*2f)
+      }
     }
 
 
 
 
+
 Summary
 -------
 This tutorial provides an introduction to schedule primitives in
diff --git a/docs/_sources/tutorials/language/sg_execution_times.rst.txt b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
index 716aa95..f321260 100644
--- a/docs/_sources/tutorials/language/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**00:03.704** total execution time for **tutorials_language** files:
+**00:03.515** total execution time for **tutorials_language** files:
 
-- **00:01.313**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
-- **00:00.666**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
-- **00:00.515**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
-- **00:00.462**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
-- **00:00.276**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
-- **00:00.174**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
-- **00:00.157**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.141**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:01.204**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
+- **00:00.628**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
+- **00:00.491**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
+- **00:00.463**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
+- **00:00.245**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
+- **00:00.172**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
+- **00:00.171**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.143**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/tutorials/language/tensorize.rst.txt b/docs/_sources/tutorials/language/tensorize.rst.txt
index 12b25bd..c31649d 100644
--- a/docs/_sources/tutorials/language/tensorize.rst.txt
+++ b/docs/_sources/tutorials/language/tensorize.rst.txt
@@ -69,11 +69,13 @@ The following lines describe the computation :code:`A * B^T` in TVM.
 
  .. code-block:: none
 
-    for (i, 0, 1024) {
-      for (j, 0, 512) {
-        C[((i*512) + j)] = 0f
-        for (k, 0, 64) {
-          C[((i*512) + j)] = (C[((i*512) + j)] + (A[((i*64) + k)]*B[((j*64) + k)]))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, 1024) {
+        for (j, 0, 512) {
+          C[((i*512) + j)] = 0f
+          for (k, 0, 64) {
+            C[((i*512) + j)] = (C[((i*512) + j)] + (A[((i*64) + k)]*B[((j*64) + k)]))
+          }
         }
       }
     }
@@ -81,6 +83,7 @@ The following lines describe the computation :code:`A * B^T` in TVM.
 
 
 
+
 Schedule the Matmul
 -------------------
 Now, suppose we have an accelerator that supports
@@ -110,12 +113,14 @@ Thus we break down the matmul loops to make the innermost loops a (16x64) GEMV.
 
  .. code-block:: none
 
-    for (i, 0, 1024) {
-      for (j.outer, 0, 32) {
-        for (j.inner, 0, 16) {
-          C[(((i*512) + (j.outer*16)) + j.inner)] = 0f
-          for (k, 0, 64) {
-            C[(((i*512) + (j.outer*16)) + j.inner)] = (C[(((i*512) + (j.outer*16)) + j.inner)] + (A[((i*64) + k)]*B[(((j.outer*1024) + (j.inner*64)) + k)]))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, 1024) {
+        for (j.outer, 0, 32) {
+          for (j.inner, 0, 16) {
+            C[(((i*512) + (j.outer*16)) + j.inner)] = 0f
+            for (k, 0, 64) {
+              C[(((i*512) + (j.outer*16)) + j.inner)] = (C[(((i*512) + (j.outer*16)) + j.inner)] + (A[((i*64) + k)]*B[(((j.outer*1024) + (j.inner*64)) + k)]))
+            }
           }
         }
       }
@@ -124,6 +129,7 @@ Thus we break down the matmul loops to make the innermost loops a (16x64) GEMV.
 
 
 
+
 As showed in the IR printed above,
 the inner loops :code:`j.inner` along with :code:`k` together form a computation of GEMV
 - within the inner most two loops, the index :code:`i` is fixed,
@@ -219,15 +225,18 @@ such placeholder can be put to let TVM automatically bind the inferred value for
 
  .. code-block:: none
 
-    for (i, 0, 1024) {
-      for (j.outer, 0, 32) {
-        gemv_update(tvm_access_ptr(type_annotation(), C, ((i*512) + (j.outer*16)), 16, 2), tvm_access_ptr(type_annotation(), A, (i*64), 64, 1), tvm_access_ptr(type_annotation(), B, (j.outer*1024), 1024, 1), 16, 64, 64)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, 1024) {
+        for (j.outer, 0, 32) {
+          gemv_update(tvm_access_ptr(type_annotation(), C, ((i*512) + (j.outer*16)), 16, 2), tvm_access_ptr(type_annotation(), A, (i*64), 64, 1), tvm_access_ptr(type_annotation(), B, (j.outer*1024), 1024, 1), 16, 64, 64)
+        }
       }
     }
 
 
 
 
+
 By tensorizing over :code:`yi`, the inner most two loops are
 now replaced by the intrinsic function we defined before.
 In order to build and run the module, let's define the external function :code:`gemv_update`,
@@ -281,16 +290,19 @@ The importing needs to happen before the tensorized GEMV being executed.
 
  .. code-block:: none
 
-    // attr [iter_var(i, )] pragma_import_llvm = "; ModuleID = '/tmp/tmpjeq03rs8/input0.cc'\nsource_filename = \"/tmp/tmpjeq03rs8/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca float*, align 8\n  %10 = alloca i32 [...]
-    for (i, 0, 1024) {
-      for (j.outer, 0, 32) {
-        gemv_update(tvm_access_ptr(type_annotation(), C, ((i*512) + (j.outer*16)), 16, 2), tvm_access_ptr(type_annotation(), A, (i*64), 64, 1), tvm_access_ptr(type_annotation(), B, (j.outer*1024), 1024, 1), 16, 64, 64)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [iter_var(i, )] pragma_import_llvm = "; ModuleID = '/tmp/tmpjupdmfr2/input0.cc'\nsource_filename = \"/tmp/tmpjupdmfr2/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca float*, align 8\n  %10 = alloca i [...]
+      for (i, 0, 1024) {
+        for (j.outer, 0, 32) {
+          gemv_update(tvm_access_ptr(type_annotation(), C, ((i*512) + (j.outer*16)), 16, 2), tvm_access_ptr(type_annotation(), A, (i*64), 64, 1), tvm_access_ptr(type_annotation(), B, (j.outer*1024), 1024, 1), 16, 64, 64)
+        }
       }
     }
 
 
 
 
+
 Finally we compare the tensorize version with that :code:`numpy.dot` produces,
 ensure our implementation is correct.
 
diff --git a/docs/_sources/tutorials/language/tuple_inputs.rst.txt b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
index 66b7167..f26def8 100644
--- a/docs/_sources/tutorials/language/tuple_inputs.rst.txt
+++ b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
@@ -62,16 +62,19 @@ together in the next schedule procedure.
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      for (j, 0, n) {
-        B.v0[((i*stride) + (j*stride))] = (A0[((i*stride) + (j*stride))] + 2f)
-        B.v1[((i*stride) + (j*stride))] = (A1[((i*stride) + (j*stride))]*3f)
+    PrimFunc([A0, A1, B.v0, B.v1]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        for (j, 0, n) {
+          B.v0[((i*stride) + (j*stride))] = (A0[((i*stride) + (j*stride))] + 2f)
+          B.v1[((i*stride) + (j*stride))] = (A1[((i*stride) + (j*stride))]*3f)
+        }
       }
     }
 
 
 
 
+
 .. _reduction-with-tuple-inputs:
 
 Describe Reduction with Collaborative Inputs
@@ -122,18 +125,21 @@ with :py:func:`te.comm_reducer` as below:
 
  .. code-block:: none
 
-    for (i, 0, m) {
-      T.v0[(i*stride)] = -1
-      T.v1[(i*stride)] = -2147483648
-      for (k, 0, n) {
-        T.v0[(i*stride)] = tvm_if_then_else((val[((i*stride) + (k*stride))] <= T.v1[(i*stride)]), T.v0[(i*stride)], idx[((i*stride) + (k*stride))])
-        T.v1[(i*stride)] = tvm_if_then_else((val[((i*stride) + (k*stride))] <= T.v1[(i*stride)]), T.v1[(i*stride)], val[((i*stride) + (k*stride))])
+    PrimFunc([idx, val, T.v0, T.v1]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (i, 0, m) {
+        T.v0[(i*stride)] = -1
+        T.v1[(i*stride)] = -2147483648
+        for (k, 0, n) {
+          T.v0[(i*stride)] = tvm_if_then_else((val[((i*stride) + (k*stride))] <= T.v1[(i*stride)]), T.v0[(i*stride)], idx[((i*stride) + (k*stride))])
+          T.v1[(i*stride)] = tvm_if_then_else((val[((i*stride) + (k*stride))] <= T.v1[(i*stride)]), T.v1[(i*stride)], val[((i*stride) + (k*stride))])
+        }
       }
     }
 
 
 
 
+
 .. note::
 
   For ones who are not familiar with reduction, please refer to
@@ -171,23 +177,26 @@ in terms of operation.
 
  .. code-block:: none
 
-    // attr [B.v0] storage_scope = "global"
-    allocate B.v0[float32 * n]
-    // attr [B.v1] storage_scope = "global"
-    allocate B.v1[float32 * n]
-    for (i, 0, m) {
-      for (j, 0, n) {
-        B.v0[j] = (A0[((i*stride) + (j*stride))] + 2f)
-        B.v1[j] = (A0[((i*stride) + (j*stride))]*3f)
-      }
-      for (j, 0, n) {
-        C[((i*stride) + (j*stride))] = (A1[((i*stride) + (j*stride))] + B.v0[j])
+    PrimFunc([A0, A1, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [B.v0] storage_scope = "global"
+      allocate B.v0[float32 * n]
+      // attr [B.v1] storage_scope = "global"
+      allocate B.v1[float32 * n]
+      for (i, 0, m) {
+        for (j, 0, n) {
+          B.v0[j] = (A0[((i*stride) + (j*stride))] + 2f)
+          B.v1[j] = (A0[((i*stride) + (j*stride))]*3f)
+        }
+        for (j, 0, n) {
+          C[((i*stride) + (j*stride))] = (A1[((i*stride) + (j*stride))] + B.v0[j])
+        }
       }
     }
 
 
 
 
+
 Summary
 -------
 This tutorial introduces the usage of tuple inputs operation.
diff --git a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
index bad8d43..0a30fdc 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
@@ -293,7 +293,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 19.555312 ms
+    Convolution: 19.530313 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
index 0d12c54..8602daf 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
@@ -340,68 +340,70 @@ one time.
 
  .. code-block:: none
 
-    // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 196
-    // attr [Conv.wmma.accumulator] storage_scope = "wmma.accumulator"
-    allocate Conv.wmma.accumulator[float32 * 2048]
-    // attr [Apad.shared] storage_scope = "shared"
-    allocate Apad.shared[float16 * 12288]
-    // attr [W.shared] storage_scope = "shared"
-    allocate W.shared[float16 * 12288]
-    // attr [Apad.shared.wmma.matrix_a] storage_scope = "wmma.matrix_a"
-    allocate Apad.shared.wmma.matrix_a[float16 * 512]
-    // attr [W.shared.wmma.matrix_b] storage_scope = "wmma.matrix_b"
-    allocate W.shared.wmma.matrix_b[float16 * 1024]
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
-    // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 4
-    // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 4
-    // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 2
-    for (n.c.init, 0, 2) {
-      for (o.c.init, 0, 4) {
-        for (nn.c.init, 0, 16) {
-          for (oo.c.init, 0, 16) {
-            Conv.wmma.accumulator[((((n.c.init*1024) + (o.c.init*256)) + (nn.c.init*16)) + oo.c.init)] = 0f
+    PrimFunc([A, W, Conv]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 196
+      // attr [Conv.wmma.accumulator] storage_scope = "wmma.accumulator"
+      allocate Conv.wmma.accumulator[float32 * 2048]
+      // attr [Apad.shared] storage_scope = "shared"
+      allocate Apad.shared[float16 * 12288]
+      // attr [W.shared] storage_scope = "shared"
+      allocate W.shared[float16 * 12288]
+      // attr [Apad.shared.wmma.matrix_a] storage_scope = "wmma.matrix_a"
+      allocate Apad.shared.wmma.matrix_a[float16 * 512]
+      // attr [W.shared.wmma.matrix_b] storage_scope = "wmma.matrix_b"
+      allocate W.shared.wmma.matrix_b[float16 * 1024]
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
+      // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 4
+      // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 4
+      // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 2
+      for (n.c.init, 0, 2) {
+        for (o.c.init, 0, 4) {
+          for (nn.c.init, 0, 16) {
+            for (oo.c.init, 0, 16) {
+              Conv.wmma.accumulator[((((n.c.init*1024) + (o.c.init*256)) + (nn.c.init*16)) + oo.c.init)] = 0f
+            }
           }
         }
       }
-    }
-    for (ic.outer, 0, 8) {
-      for (kh, 0, 3) {
-        for (ax2, 0, 3) {
-          for (ax3, 0, 2) {
-            for (ax4.ax5.fused.outer, 0, 8) {
-              // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
-              Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), A[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.outer*512)) + (ax3*2 [...]
+      for (ic.outer, 0, 8) {
+        for (kh, 0, 3) {
+          for (ax2, 0, 3) {
+            for (ax3, 0, 2) {
+              for (ax4.ax5.fused.outer, 0, 8) {
+                // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
+                Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), A[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.outer*512)) + (ax3 [...]
+              }
             }
           }
-        }
-        for (ax1, 0, 3) {
-          for (ax2, 0, 2) {
-            // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
-            W.shared[ramp((((((ax1*4096) + (ax2*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = W[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
+          for (ax1, 0, 3) {
+            for (ax2, 0, 2) {
+              // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
+              W.shared[ramp((((((ax1*4096) + (ax2*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = W[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
+            }
           }
-        }
-        for (ic.inner, 0, 2) {
-          for (kw, 0, 3) {
-            for (ax0, 0, 2) {
-              for (ax4, 0, 16) {
-                for (ax5, 0, 16) {
-                  Apad.shared.wmma.matrix_a[(((ax0*256) + (ax4*16)) + ax5)] = Apad.shared[((((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)) + (ax4*16)) + ax5)]
+          for (ic.inner, 0, 2) {
+            for (kw, 0, 3) {
+              for (ax0, 0, 2) {
+                for (ax4, 0, 16) {
+                  for (ax5, 0, 16) {
+                    Apad.shared.wmma.matrix_a[(((ax0*256) + (ax4*16)) + ax5)] = Apad.shared[((((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)) + (ax4*16)) + ax5)]
+                  }
                 }
               }
-            }
-            for (ax3, 0, 4) {
-              for (ax4, 0, 16) {
-                for (ax5, 0, 16) {
-                  W.shared.wmma.matrix_b[(((ax3*256) + (ax4*16)) + ax5)] = W.shared[((((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3*256)) + (ax4*16)) + ax5)]
+              for (ax3, 0, 4) {
+                for (ax4, 0, 16) {
+                  for (ax5, 0, 16) {
+                    W.shared.wmma.matrix_b[(((ax3*256) + (ax4*16)) + ax5)] = W.shared[((((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3*256)) + (ax4*16)) + ax5)]
+                  }
                 }
               }
-            }
-            for (n.c, 0, 2) {
-              for (o.c, 0, 4) {
-                for (nn.c, 0, 16) {
-                  for (oo.c, 0, 16) {
-                    for (ii, 0, 16) {
-                      Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] = (Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] + (float32(Apad.shared.wmma.matrix_a[(((n.c*256) + (nn.c*16)) + ii)])*float32(W.shared.wmma.matrix_b[(((o.c*256) + (ii*16)) + oo.c)])))
+              for (n.c, 0, 2) {
+                for (o.c, 0, 4) {
+                  for (nn.c, 0, 16) {
+                    for (oo.c, 0, 16) {
+                      for (ii, 0, 16) {
+                        Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] = (Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] + (float32(Apad.shared.wmma.matrix_a[(((n.c*256) + (nn.c*16)) + ii)])*float32(W.shared.wmma.matrix_b[(((o.c*256) + (ii*16)) + oo.c)])))
+                      }
                     }
                   }
                 }
@@ -410,12 +412,12 @@ one time.
           }
         }
       }
-    }
-    for (n.inner, 0, 2) {
-      for (o.inner, 0, 4) {
-        for (nn, 0, 16) {
-          for (oo, 0, 16) {
-            Conv[(((((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)) + (nn*16)) + oo)] = Conv.wmma.accumulator[((((n.inner*1024) + (o.inner*256)) + (nn*16)) + oo)]
+      for (n.inner, 0, 2) {
+        for (o.inner, 0, 4) {
+          for (nn, 0, 16) {
+            for (oo, 0, 16) {
+              Conv[(((((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)) + (nn*16)) + oo)] = Conv.wmma.accumulator[((((n.inner*1024) + (o.inner*256)) + (nn*16)) + oo)]
+            }
           }
         }
       }
@@ -424,6 +426,7 @@ one time.
 
 
 
+
 Lowering Computation to Intrinsics
 ----------------------------------
 The last phase is to lower the computation loops down to TensorCore hardware intrinsics
@@ -449,68 +452,71 @@ by mapping the 2D convolution to tensor intrinsics
 
  .. code-block:: none
 
-    // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 196
-    // attr [Conv.wmma.accumulator] storage_scope = "wmma.accumulator"
-    allocate Conv.wmma.accumulator[float32 * 2048]
-    // attr [Apad.shared] storage_scope = "shared"
-    allocate Apad.shared[float16 * 12288]
-    // attr [W.shared] storage_scope = "shared"
-    allocate W.shared[float16 * 12288]
-    // attr [Apad.shared.wmma.matrix_a] storage_scope = "wmma.matrix_a"
-    allocate Apad.shared.wmma.matrix_a[float16 * 512]
-    // attr [W.shared.wmma.matrix_b] storage_scope = "wmma.matrix_b"
-    allocate W.shared.wmma.matrix_b[float16 * 1024]
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
-    // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 4
-    // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 4
-    // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 2
-    for (n.c.init, 0, 2) {
-      for (o.c.init, 0, 4) {
-        tvm_fill_fragment(Conv.wmma.accumulator, 16, 16, 16, ((n.c.init*4) + o.c.init), 0f)
+    PrimFunc([A, W, Conv]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 196
+      // attr [Conv.wmma.accumulator] storage_scope = "wmma.accumulator"
+      allocate Conv.wmma.accumulator[float32 * 2048]
+      // attr [Apad.shared] storage_scope = "shared"
+      allocate Apad.shared[float16 * 12288]
+      // attr [W.shared] storage_scope = "shared"
+      allocate W.shared[float16 * 12288]
+      // attr [Apad.shared.wmma.matrix_a] storage_scope = "wmma.matrix_a"
+      allocate Apad.shared.wmma.matrix_a[float16 * 512]
+      // attr [W.shared.wmma.matrix_b] storage_scope = "wmma.matrix_b"
+      allocate W.shared.wmma.matrix_b[float16 * 1024]
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
+      // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 4
+      // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 4
+      // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 2
+      for (n.c.init, 0, 2) {
+        for (o.c.init, 0, 4) {
+          tvm_fill_fragment(Conv.wmma.accumulator, 16, 16, 16, ((n.c.init*4) + o.c.init), 0f)
+        }
       }
-    }
-    for (ic.outer, 0, 8) {
-      for (kh, 0, 3) {
-        for (ax2, 0, 3) {
-          for (ax3, 0, 2) {
-            for (ax4.ax5.fused.outer, 0, 8) {
-              // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
-              Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), A[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.outer*512)) + (ax3*2 [...]
+      for (ic.outer, 0, 8) {
+        for (kh, 0, 3) {
+          for (ax2, 0, 3) {
+            for (ax3, 0, 2) {
+              for (ax4.ax5.fused.outer, 0, 8) {
+                // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
+                Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), A[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.outer*512)) + (ax3 [...]
+              }
             }
           }
-        }
-        for (ax1, 0, 3) {
-          for (ax2, 0, 2) {
-            // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
-            W.shared[ramp((((((ax1*4096) + (ax2*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = W[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
-          }
-        }
-        for (ic.inner, 0, 2) {
-          for (kw, 0, 3) {
-            for (ax0, 0, 2) {
-              tvm_load_matrix_sync(Apad.shared.wmma.matrix_a, 16, 16, 16, ax0, tvm_access_ptr(type_annotation(), Apad.shared, ((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)), 256, 1), 16, "row_major")
-            }
-            for (ax3, 0, 4) {
-              tvm_load_matrix_sync(W.shared.wmma.matrix_b, 16, 16, 16, ax3, tvm_access_ptr(type_annotation(), W.shared, ((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3*256)), 256, 1), 16, "row_major")
+          for (ax1, 0, 3) {
+            for (ax2, 0, 2) {
+              // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 32
+              W.shared[ramp((((((ax1*4096) + (ax2*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = W[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
             }
-            for (n.c, 0, 2) {
-              for (o.c, 0, 4) {
-                tvm_mma_sync(Conv.wmma.accumulator, ((n.c*4) + o.c), Apad.shared.wmma.matrix_a, n.c, W.shared.wmma.matrix_b, o.c, Conv.wmma.accumulator, ((n.c*4) + o.c))
+          }
+          for (ic.inner, 0, 2) {
+            for (kw, 0, 3) {
+              for (ax0, 0, 2) {
+                tvm_load_matrix_sync(Apad.shared.wmma.matrix_a, 16, 16, 16, ax0, tvm_access_ptr(type_annotation(), Apad.shared, ((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)), 256, 1), 16, "row_major")
+              }
+              for (ax3, 0, 4) {
+                tvm_load_matrix_sync(W.shared.wmma.matrix_b, 16, 16, 16, ax3, tvm_access_ptr(type_annotation(), W.shared, ((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3*256)), 256, 1), 16, "row_major")
+              }
+              for (n.c, 0, 2) {
+                for (o.c, 0, 4) {
+                  tvm_mma_sync(Conv.wmma.accumulator, ((n.c*4) + o.c), Apad.shared.wmma.matrix_a, n.c, W.shared.wmma.matrix_b, o.c, Conv.wmma.accumulator, ((n.c*4) + o.c))
+                }
               }
             }
           }
         }
       }
-    }
-    for (n.inner, 0, 2) {
-      for (o.inner, 0, 4) {
-        tvm_store_matrix_sync(Conv.wmma.accumulator, 16, 16, 16, ((n.inner*4) + o.inner), tvm_access_ptr(type_annotation(), Conv, (((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)), 256, 2), 16, "row_major")
+      for (n.inner, 0, 2) {
+        for (o.inner, 0, 4) {
+          tvm_store_matrix_sync(Conv.wmma.accumulator, 16, 16, 16, ((n.inner*4) + o.inner), tvm_access_ptr(type_annotation(), Conv, (((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)), 256, 2), 16, "row_major")
+        }
       }
     }
 
 
 
 
+
 Generate CUDA Kernel
 --------------------
 Finally we use TVM to generate and compile the CUDA kernel, and evaluate the latency of convolution.
@@ -543,7 +549,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.301623 ms
+    conv2d with tensor core: 6.288323 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
index 25a430f..ec9f776 100644
--- a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.005827
-    Baseline: 5.754878
+    Numpy running time: 0.008158
+    Baseline: 5.951094
 
 
 
@@ -142,11 +142,13 @@ Here is the generated IR using our baseline schedule.
 
  .. code-block:: none
 
-    for (x, 0, 1024) {
-      for (y, 0, 1024) {
-        C[((x*1024) + y)] = 0f
-        for (k, 0, 1024) {
-          C[((x*1024) + y)] = (C[((x*1024) + y)] + (A[((x*1024) + k)]*B[((k*1024) + y)]))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (x, 0, 1024) {
+        for (y, 0, 1024) {
+          C[((x*1024) + y)] = 0f
+          for (k, 0, 1024) {
+            C[((x*1024) + y)] = (C[((x*1024) + y)] + (A[((x*1024) + k)]*B[((k*1024) + y)]))
+          }
         }
       }
     }
@@ -154,6 +156,7 @@ Here is the generated IR using our baseline schedule.
 
 
 
+
 Blocking
 --------
 A important trick to enhance the cache hit rate is blocking --- data chunk will be computed
@@ -198,7 +201,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.107895
+    Opt1: 0.110619
 
 
 
@@ -220,18 +223,20 @@ Here is the generated IR after blocking.
 
  .. code-block:: none
 
-    for (x.outer, 0, 32) {
-      for (y.outer, 0, 32) {
-        for (x.inner.init, 0, 32) {
-          for (y.inner.init, 0, 32) {
-            C[((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)) + y.inner.init)] = 0f
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (x.outer, 0, 32) {
+        for (y.outer, 0, 32) {
+          for (x.inner.init, 0, 32) {
+            for (y.inner.init, 0, 32) {
+              C[((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)) + y.inner.init)] = 0f
+            }
           }
-        }
-        for (k.outer, 0, 256) {
-          for (k.inner, 0, 4) {
-            for (x.inner, 0, 32) {
-              for (y.inner, 0, 32) {
-                C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] + (A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]*B[((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)) + y.inner)]))
+          for (k.outer, 0, 256) {
+            for (k.inner, 0, 4) {
+              for (x.inner, 0, 32) {
+                for (y.inner, 0, 32) {
+                  C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] + (A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]*B[((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)) + y.inner)]))
+                }
               }
             }
           }
@@ -242,6 +247,7 @@ Here is the generated IR after blocking.
 
 
 
+
 Vectorization
 -------------
 Another important trick is vectorization. When the memory access pattern is uniform,
@@ -284,7 +290,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.111950
+    Opt2: 0.130606
 
 
 
@@ -306,15 +312,17 @@ Here is the generated IR after vectorization.
 
  .. code-block:: none
 
-    for (x.outer, 0, 32) {
-      for (y.outer, 0, 32) {
-        for (x.inner.init, 0, 32) {
-          C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
-        }
-        for (k.outer, 0, 256) {
-          for (k.inner, 0, 4) {
-            for (x.inner, 0, 32) {
-              C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*B[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (x.outer, 0, 32) {
+        for (y.outer, 0, 32) {
+          for (x.inner.init, 0, 32) {
+            C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
+          }
+          for (k.outer, 0, 256) {
+            for (k.inner, 0, 4) {
+              for (x.inner, 0, 32) {
+                C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*B[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
+              }
             }
           }
         }
@@ -324,6 +332,7 @@ Here is the generated IR after vectorization.
 
 
 
+
 Loop Permutation
 ----------------
 If we look at the above IR, we can see the inner loop row data is vectorized and
@@ -365,7 +374,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.060003
+    Opt3: 0.060277
 
 
 
@@ -387,15 +396,17 @@ Here is the generated IR after loop permutation.
 
  .. code-block:: none
 
-    for (x.outer, 0, 32) {
-      for (y.outer, 0, 32) {
-        for (x.inner.init, 0, 32) {
-          C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
-        }
-        for (k.outer, 0, 256) {
-          for (x.inner, 0, 32) {
-            for (k.inner, 0, 4) {
-              C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*B[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      for (x.outer, 0, 32) {
+        for (y.outer, 0, 32) {
+          for (x.inner.init, 0, 32) {
+            C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
+          }
+          for (k.outer, 0, 256) {
+            for (x.inner, 0, 32) {
+              for (k.inner, 0, 4) {
+                C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*B[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
+              }
             }
           }
         }
@@ -405,6 +416,7 @@ Here is the generated IR after loop permutation.
 
 
 
+
 Array Packing
 -------------
 Another important trick is array packing. This trick is to reorder the storage dimension of the
@@ -465,7 +477,7 @@ the corresponding value from the packed array.
 
  .. code-block:: none
 
-    Opt4: 0.060794
+    Opt4: 0.066516
 
 
 
@@ -487,22 +499,24 @@ Here is the generated IR after array packing.
 
  .. code-block:: none
 
-    // attr [packedB] storage_scope = "global"
-    allocate packedB[float32x32 * 32768]
-    parallel (x, 0, 32) {
-      for (y, 0, 1024) {
-        packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
-      }
-    }
-    for (x.outer, 0, 32) {
-      for (y.outer, 0, 32) {
-        for (x.inner.init, 0, 32) {
-          C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [packedB] storage_scope = "global"
+      allocate packedB[float32x32 * 32768]
+      parallel (x, 0, 32) {
+        for (y, 0, 1024) {
+          packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
         }
-        for (k.outer, 0, 256) {
-          for (x.inner, 0, 32) {
-            for (k.inner, 0, 4) {
-              C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + (k.inner*32)), 1, 32)]))
+      }
+      for (x.outer, 0, 32) {
+        for (y.outer, 0, 32) {
+          for (x.inner.init, 0, 32) {
+            C[ramp((((x.outer*32768) + (x.inner.init*1024)) + (y.outer*32)), 1, 32)] = x32(0f)
+          }
+          for (k.outer, 0, 256) {
+            for (x.inner, 0, 32) {
+              for (k.inner, 0, 4) {
+                C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = (C[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (x32(A[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + (k.inner*32)), 1, 32)]))
+              }
             }
           }
         }
@@ -512,6 +526,7 @@ Here is the generated IR after array packing.
 
 
 
+
 Write cache for blocks
 ----------------------
 After blocking, the program will write result to C block by block, the access pattern
@@ -566,7 +581,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.060997
+    Opt5: 0.061546
 
 
 
@@ -588,31 +603,33 @@ Here is the generated IR after blocking.
 
  .. code-block:: none
 
-    // attr [packedB] storage_scope = "global"
-    allocate packedB[float32x32 * 32768]
-    // attr [C.global] storage_scope = "global"
-    allocate C.global[float32 * 1024]
-    parallel (x, 0, 32) {
-      for (y, 0, 1024) {
-        packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
-      }
-    }
-    for (x.outer, 0, 32) {
-      for (y.outer, 0, 32) {
-        for (x.c.init, 0, 32) {
-          C.global[ramp((x.c.init*32), 1, 32)] = x32(0f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [packedB] storage_scope = "global"
+      allocate packedB[float32x32 * 32768]
+      // attr [C.global] storage_scope = "global"
+      allocate C.global[float32 * 1024]
+      parallel (x, 0, 32) {
+        for (y, 0, 1024) {
+          packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
         }
-        for (k.outer, 0, 256) {
-          for (x.c, 0, 32) {
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))])*packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
+      }
+      for (x.outer, 0, 32) {
+        for (y.outer, 0, 32) {
+          for (x.c.init, 0, 32) {
+            C.global[ramp((x.c.init*32), 1, 32)] = x32(0f)
           }
-        }
-        for (x.inner, 0, 32) {
-          for (y.inner, 0, 32) {
-            C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = C.global[((x.inner*32) + y.inner)]
+          for (k.outer, 0, 256) {
+            for (x.c, 0, 32) {
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))])*packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
+            }
+          }
+          for (x.inner, 0, 32) {
+            for (y.inner, 0, 32) {
+              C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = C.global[((x.inner*32) + y.inner)]
+            }
           }
         }
       }
@@ -621,6 +638,7 @@ Here is the generated IR after blocking.
 
 
 
+
 Parallel
 --------
 Futhermore, we can also utilize multi-core processors to do the thread-level parallelization.
@@ -673,7 +691,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.008228
+    Opt6: 0.008298
 
 
 
@@ -695,31 +713,33 @@ Here is the generated IR after parallelization.
 
  .. code-block:: none
 
-    // attr [packedB] storage_scope = "global"
-    allocate packedB[float32x32 * 32768]
-    parallel (x, 0, 32) {
-      for (y, 0, 1024) {
-        packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
-      }
-    }
-    parallel (x.outer, 0, 32) {
-      // attr [C.global] storage_scope = "global"
-      allocate C.global[float32 * 1024]
-      for (y.outer, 0, 32) {
-        for (x.c.init, 0, 32) {
-          C.global[ramp((x.c.init*32), 1, 32)] = x32(0f)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [packedB] storage_scope = "global"
+      allocate packedB[float32x32 * 32768]
+      parallel (x, 0, 32) {
+        for (y, 0, 1024) {
+          packedB[ramp(((x*32768) + (y*32)), 1, 32)] = B[ramp(((y*1024) + (x*32)), 1, 32)]
         }
-        for (k.outer, 0, 256) {
-          for (x.c, 0, 32) {
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))])*packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
-            C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
+      }
+      parallel (x.outer, 0, 32) {
+        // attr [C.global] storage_scope = "global"
+        allocate C.global[float32 * 1024]
+        for (y.outer, 0, 32) {
+          for (x.c.init, 0, 32) {
+            C.global[ramp((x.c.init*32), 1, 32)] = x32(0f)
           }
-        }
-        for (x.inner, 0, 32) {
-          for (y.inner, 0, 32) {
-            C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = C.global[((x.inner*32) + y.inner)]
+          for (k.outer, 0, 256) {
+            for (x.c, 0, 32) {
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))])*packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
+              C.global[ramp((x.c*32), 1, 32)] = (C.global[ramp((x.c*32), 1, 32)] + (x32(A[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)])*packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
+            }
+          }
+          for (x.inner, 0, 32) {
+            for (y.inner, 0, 32) {
+              C[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = C.global[((x.inner*32) + y.inner)]
+            }
           }
         }
       }
@@ -728,6 +748,7 @@ Here is the generated IR after parallelization.
 
 
 
+
 Summary
 -------
 After applying the above simple optimizations with only 18 lines of code,
diff --git a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
index 736dde1..7885a69 100644
--- a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
 
 Computation times
 =================
-**00:25.623** total execution time for **tutorials_optimize** files:
+**00:26.942** total execution time for **tutorials_optimize** files:
 
-- **00:23.853**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
-- **00:00.912**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:00.705**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
-- **00:00.153**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
+- **00:25.096**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
+- **00:00.925**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:00.770**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:00.152**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
diff --git a/docs/_sources/tutorials/relay_quick_start.rst.txt b/docs/_sources/tutorials/relay_quick_start.rst.txt
index f5d4ad3..cabe8be 100644
--- a/docs/_sources/tutorials/relay_quick_start.rst.txt
+++ b/docs/_sources/tutorials/relay_quick_start.rst.txt
@@ -223,7 +223,7 @@ in this example. Then the machine code will be generated as the module library.
 
  .. code-block:: none
 
-
    ...1%, 0.01 MB, 52 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 104 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 156 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 207 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 259 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 310 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 362 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 411 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 462 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 513 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 564 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 613 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 664 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 711 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 762 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 810 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 860 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 908 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 958 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 1007 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 1056 KB/s, 0 seconds
  passed
    ...43%, 0.17 MB, 1103 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 1152 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 1200 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 1250 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 1296 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 1345 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 1392 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 1441 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 1487 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 1536 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 1581 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 1630 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 1676 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 1724 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 1768 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 1816 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 1858 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 1906 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 1954 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 2001 KB/s, 0 seconds passed
    ...83
 %, 0.33 MB, 2049 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 2096 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 2139 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 2187 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 2234 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 2281 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 2326 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 2373 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 2417 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 2463 KB/s, 0 seconds passed
+
    ...1%, 0.01 MB, 51 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 102 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 153 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 203 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 254 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 303 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 354 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 403 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 453 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 501 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 551 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 599 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 648 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 695 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 744 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 793 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 841 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 887 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 935 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 984 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 1032 KB/s, 0 seconds 
 passed
    ...43%, 0.17 MB, 1077 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 1125 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 1173 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 1221 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 1265 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 1313 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 1359 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 1407 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 1451 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 1499 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 1544 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 1592 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 1635 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 1682 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 1727 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 1774 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 1817 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 1864 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 1908 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 1955 KB/s, 0 seconds passed
    ...83%
 , 0.33 MB, 1997 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 2043 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 2087 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 2133 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 2177 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 2223 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 2269 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 2315 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 2360 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 2405 KB/s, 0 seconds passed
     Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
diff --git a/docs/_sources/tutorials/sg_execution_times.rst.txt b/docs/_sources/tutorials/sg_execution_times.rst.txt
index 54c3666..bc33262 100644
--- a/docs/_sources/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:09.413** total execution time for **tutorials** files:
+**00:09.438** total execution time for **tutorials** files:
 
-- **00:08.906**: :ref:`sphx_glr_tutorials_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:00.354**: :ref:`sphx_glr_tutorials_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:00.154**: :ref:`sphx_glr_tutorials_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:08.993**: :ref:`sphx_glr_tutorials_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:00.330**: :ref:`sphx_glr_tutorials_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:00.115**: :ref:`sphx_glr_tutorials_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
diff --git a/docs/_sources/tutorials/topi/intro_topi.rst.txt b/docs/_sources/tutorials/topi/intro_topi.rst.txt
index 76e6d29..5b5f7da 100644
--- a/docs/_sources/tutorials/topi/intro_topi.rst.txt
+++ b/docs/_sources/tutorials/topi/intro_topi.rst.txt
@@ -72,18 +72,21 @@ and to examine the IR code in human readable format, we can do
 
  .. code-block:: none
 
-    // attr [B] storage_scope = "global"
-    allocate B[float32 * n]
-    for (i, 0, n) {
-      B[i] = 0f
-      for (k, 0, m) {
-        B[i] = (B[i] + A[((i*stride) + (k*stride))])
+    PrimFunc([A]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [B] storage_scope = "global"
+      allocate B[float32 * n]
+      for (i, 0, n) {
+        B[i] = 0f
+        for (k, 0, m) {
+          B[i] = (B[i] + A[((i*stride) + (k*stride))])
+        }
       }
     }
 
 
 
 
+
 However, for such a common operation we had to define the reduce axis ourselves as well as explicit computation with
 :code:`te.compute`. Imagine for more complicated operations how much details we need to provide.
 Fortunately, we can replace those two lines with simple :code:`topi.sum` much like :code:`numpy.sum`
@@ -106,18 +109,21 @@ Fortunately, we can replace those two lines with simple :code:`topi.sum` much li
 
  .. code-block:: none
 
-    // attr [A_red] storage_scope = "global"
-    allocate A_red[float32 * n]
-    for (ax0, 0, n) {
-      A_red[ax0] = 0f
-      for (k1, 0, m) {
-        A_red[ax0] = (A_red[ax0] + A[((ax0*stride) + (k1*stride))])
+    PrimFunc([A]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [A_red] storage_scope = "global"
+      allocate A_red[float32 * n]
+      for (ax0, 0, n) {
+        A_red[ax0] = 0f
+        for (k1, 0, m) {
+          A_red[ax0] = (A_red[ax0] + A[((ax0*stride) + (k1*stride))])
+        }
       }
     }
 
 
 
 
+
 Numpy-style operator overloading
 --------------------------------
 We can add two tensors using :code:`topi.broadcast_add` that have correct (broadcastable with specific) shapes.
@@ -170,24 +176,27 @@ we can schedule the following series of operations ending with :code:`topi.sum`
 
  .. code-block:: none
 
-    // attr [T_divide_red] storage_scope = "global"
-    allocate T_divide_red[float32 * 1]
-    // attr [iter_var(threadIdx.x, range(min=0, ext=1024), threadIdx.x)] thread_extent = 1024
-    // attr [T_divide_red.rf] storage_scope = "local"
-    allocate T_divide_red.rf[float32 * 1]
-    // attr [reduce_temp0] storage_scope = "local"
-    allocate reduce_temp0[float32 * 1]
-    T_divide_red.rf[0] = 0f
-    for (k0.k1.fused.k2.fused.outer, 0, 10) {
-      if (likely((((((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)))) {
-        T_divide_red.rf[0] = (T_divide_red.rf[0] + (((a[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)] + b[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]) + (a[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)]*b[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]))*0.5f))
+    PrimFunc([a, b]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [T_divide_red] storage_scope = "global"
+      allocate T_divide_red[float32 * 1]
+      // attr [iter_var(threadIdx.x, range(min=0, ext=1024), threadIdx.x)] thread_extent = 1024
+      // attr [T_divide_red.rf] storage_scope = "local"
+      allocate T_divide_red.rf[float32 * 1]
+      // attr [reduce_temp0] storage_scope = "local"
+      allocate reduce_temp0[float32 * 1]
+      T_divide_red.rf[0] = 0f
+      for (k0.k1.fused.k2.fused.outer, 0, 10) {
+        if (likely((((((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)))) {
+          T_divide_red.rf[0] = (T_divide_red.rf[0] + (((a[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)] + b[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]) + (a[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)]*b[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]))*0.5f))
+        }
+      }
+      // attr [comm_reducer(result=[(x + y)], lhs=[x], rhs=[y], identity_element=[0f])] reduce_scope = reinterpret((uint64)0)
+      tvm_thread_allreduce((uint32)1, T_divide_red.rf[0], (bool)1, reduce_temp0, threadIdx.x)
+      if ((threadIdx.x == 0)) {
+        T_divide_red[0] = reduce_temp0[0]
       }
     }
-    // attr [comm_reducer(result=[(x + y)], lhs=[x], rhs=[y], identity_element=[0f])] reduce_scope = reinterpret((uint64)0)
-    tvm_thread_allreduce((uint32)1, T_divide_red.rf[0], (bool)1, reduce_temp0, threadIdx.x)
-    if ((threadIdx.x == 0)) {
-      T_divide_red[0] = reduce_temp0[0]
-    }
+
 
 
 
@@ -210,7 +219,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, 0x11b9187f0), stage(b, 0x11b7670d0), stage(T_add, 0x11b7baf30), stage(T_multiply, 0x1081b3010), stage(T_elemwise_sum, 0x11a19aab0), stage(T_divide, 0x11b78c420), stage(T_divide_red.rf, 0x117ddf470), stage(T_divide_red, 0x11b919310)]
+    [stage(a, 0x11af95c90), stage(b, 0x10cb2e730), stage(T_add, 0x110120e00), stage(T_multiply, 0x1178fde20), stage(T_elemwise_sum, 0x10ca51780), stage(T_divide, 0x11aaebd90), stage(T_divide_red.rf, 0x11b0441b0), stage(T_divide_red, 0x11b040c00)]
 
 
 
@@ -259,43 +268,46 @@ TOPI also provides common neural nets operations such as _softmax_ with optimize
 
  .. code-block:: none
 
-    // attr [T_softmax_maxelem] storage_scope = "global"
-    allocate T_softmax_maxelem[float32 * 512]
-    // attr [T_softmax_exp] storage_scope = "global"
-    allocate T_softmax_exp[float32 * 262144]
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
-    T_softmax_maxelem[blockIdx.x] = -3.40282e+38f
-    for (k, 0, 512) {
-      T_softmax_maxelem[blockIdx.x] = max(T_softmax_maxelem[blockIdx.x], tarray[((blockIdx.x*512) + k)])
-    }
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
-    for (i1, 0, 512) {
-      T_softmax_exp[((blockIdx.x*512) + i1)] = exp((tarray[((blockIdx.x*512) + i1)] - T_softmax_maxelem[blockIdx.x]))
-    }
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
-    // attr [T_softmax_expsum.rf] storage_scope = "local"
-    allocate T_softmax_expsum.rf[float32 * 1]
-    // attr [reduce_temp0] storage_scope = "local"
-    allocate reduce_temp0[float32 * 1]
-    // attr [iter_var(threadIdx.x, range(min=0, ext=64), threadIdx.x)] thread_extent = 64
-    T_softmax_expsum.rf[0] = 0f
-    for (k.outer, 0, 8) {
-      T_softmax_expsum.rf[0] = (T_softmax_expsum.rf[0] + T_softmax_exp[(((blockIdx.x*512) + (k.outer*64)) + threadIdx.x)])
-    }
-    // attr [comm_reducer(result=[(x + y)], lhs=[x], rhs=[y], identity_element=[0f])] reduce_scope = reinterpret((uint64)0)
-    tvm_thread_allreduce((uint32)1, T_softmax_expsum.rf[0], (bool)1, reduce_temp0, threadIdx.x)
-    if ((threadIdx.x == 0)) {
-      T_softmax_maxelem[blockIdx.x] = reduce_temp0[0]
-    }
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
-    // attr [iter_var(threadIdx.x, range(min=0, ext=64), threadIdx.x)] thread_extent = 64
-    for (i1.inner, 0, 8) {
-      T_softmax_exp[(((blockIdx.x*512) + (threadIdx.x*8)) + i1.inner)] = (T_softmax_exp[(((blockIdx.x*512) + (threadIdx.x*8)) + i1.inner)]/T_softmax_maxelem[blockIdx.x])
+    PrimFunc([tarray]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [T_softmax_maxelem] storage_scope = "global"
+      allocate T_softmax_maxelem[float32 * 512]
+      // attr [T_softmax_exp] storage_scope = "global"
+      allocate T_softmax_exp[float32 * 262144]
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
+      T_softmax_maxelem[blockIdx.x] = -3.40282e+38f
+      for (k, 0, 512) {
+        T_softmax_maxelem[blockIdx.x] = max(T_softmax_maxelem[blockIdx.x], tarray[((blockIdx.x*512) + k)])
+      }
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
+      for (i1, 0, 512) {
+        T_softmax_exp[((blockIdx.x*512) + i1)] = exp((tarray[((blockIdx.x*512) + i1)] - T_softmax_maxelem[blockIdx.x]))
+      }
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
+      // attr [T_softmax_expsum.rf] storage_scope = "local"
+      allocate T_softmax_expsum.rf[float32 * 1]
+      // attr [reduce_temp0] storage_scope = "local"
+      allocate reduce_temp0[float32 * 1]
+      // attr [iter_var(threadIdx.x, range(min=0, ext=64), threadIdx.x)] thread_extent = 64
+      T_softmax_expsum.rf[0] = 0f
+      for (k.outer, 0, 8) {
+        T_softmax_expsum.rf[0] = (T_softmax_expsum.rf[0] + T_softmax_exp[(((blockIdx.x*512) + (k.outer*64)) + threadIdx.x)])
+      }
+      // attr [comm_reducer(result=[(x + y)], lhs=[x], rhs=[y], identity_element=[0f])] reduce_scope = reinterpret((uint64)0)
+      tvm_thread_allreduce((uint32)1, T_softmax_expsum.rf[0], (bool)1, reduce_temp0, threadIdx.x)
+      if ((threadIdx.x == 0)) {
+        T_softmax_maxelem[blockIdx.x] = reduce_temp0[0]
+      }
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 512
+      // attr [iter_var(threadIdx.x, range(min=0, ext=64), threadIdx.x)] thread_extent = 64
+      for (i1.inner, 0, 8) {
+        T_softmax_exp[(((blockIdx.x*512) + (threadIdx.x*8)) + i1.inner)] = (T_softmax_exp[(((blockIdx.x*512) + (threadIdx.x*8)) + i1.inner)]/T_softmax_maxelem[blockIdx.x])
+      }
     }
 
 
 
 
+
 Fusing convolutions
 -------------------
 We can fuse :code:`topi.nn.conv2d` and :code:`topi.nn.relu` together.
@@ -331,262 +343,265 @@ We can fuse :code:`topi.nn.conv2d` and :code:`topi.nn.relu` together.
 
  .. code-block:: none
 
-    // attr [compute] storage_scope = "global"
-    allocate compute[float32 * 501760]
-    // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 5
-    // attr [compute] storage_scope = "local"
-    allocate compute[float32 * 14]
-    // attr [pad_temp.shared] storage_scope = "shared"
-    allocate pad_temp.shared[float32 * 112]
-    // attr [placeholder.shared] storage_scope = "shared"
-    allocate placeholder.shared[float32 * 2]
-    // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 224
-    // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
-    // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-    // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-    // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-    compute[0] = 0f
-    compute[2] = 0f
-    compute[4] = 0f
-    compute[6] = 0f
-    compute[8] = 0f
-    compute[10] = 0f
-    compute[12] = 0f
-    compute[1] = 0f
-    compute[3] = 0f
-    compute[5] = 0f
-    compute[7] = 0f
-    compute[9] = 0f
-    compute[11] = 0f
-    compute[13] = 0f
-    for (rc.outer, 0, 3) {
-      for (ry.outer, 0, 5) {
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (2 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 450)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 449)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        if (likely((threadIdx.x < 2))) {
+    PrimFunc([placeholder, placeholder]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [compute] storage_scope = "global"
+      allocate compute[float32 * 501760]
+      // attr [iter_var(blockIdx.z, , blockIdx.z)] thread_extent = 5
+      // attr [compute] storage_scope = "local"
+      allocate compute[float32 * 14]
+      // attr [pad_temp.shared] storage_scope = "shared"
+      allocate pad_temp.shared[float32 * 112]
+      // attr [placeholder.shared] storage_scope = "shared"
+      allocate placeholder.shared[float32 * 2]
+      // attr [iter_var(blockIdx.y, , blockIdx.y)] thread_extent = 224
+      // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 2
+      // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+      // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+      // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+      compute[0] = 0f
+      compute[2] = 0f
+      compute[4] = 0f
+      compute[6] = 0f
+      compute[8] = 0f
+      compute[10] = 0f
+      compute[12] = 0f
+      compute[1] = 0f
+      compute[3] = 0f
+      compute[5] = 0f
+      compute[7] = 0f
+      compute[9] = 0f
+      compute[11] = 0f
+      compute[13] = 0f
+      for (rc.outer, 0, 3) {
+        for (ry.outer, 0, 5) {
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+          pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (2 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 450)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 449)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
           if (likely((threadIdx.x < 2))) {
             if (likely((threadIdx.x < 2))) {
               if (likely((threadIdx.x < 2))) {
                 if (likely((threadIdx.x < 2))) {
                   if (likely((threadIdx.x < 2))) {
-                    if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
-                      placeholder.shared[threadIdx.x] = placeholder[((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5))]
+                    if (likely((threadIdx.x < 2))) {
+                      if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
+                        placeholder.shared[threadIdx.x] = placeholder[((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5))]
+                      }
                     }
                   }
                 }
               }
             }
           }
-        }
-        compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
-        compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
-        compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
-        compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
-        compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
-        compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
-        compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
-        compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
-        compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
-        compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
-        compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
-        compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
-        compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
-        compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 449)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        if (likely((threadIdx.x < 2))) {
+          compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
+          compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
+          compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
+          compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
+          compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
+          compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
+          compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
+          compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
+          compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
+          compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
+          compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
+          compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
+          compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
+          compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+          pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x*7)))), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 449)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
           if (likely((threadIdx.x < 2))) {
             if (likely((threadIdx.x < 2))) {
               if (likely((threadIdx.x < 2))) {
                 if (likely((threadIdx.x < 2))) {
                   if (likely((threadIdx.x < 2))) {
-                    if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
-                      placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 1)]
+                    if (likely((threadIdx.x < 2))) {
+                      if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
+                        placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 1)]
+                      }
                     }
                   }
                 }
               }
             }
           }
-        }
-        compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
-        compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
-        compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
-        compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
-        compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
-        compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
-        compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
-        compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
-        compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
-        compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
-        compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
-        compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
-        compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
-        compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        if (likely((threadIdx.x < 2))) {
+          compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
+          compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
+          compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
+          compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
+          compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
+          compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
+          compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
+          compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
+          compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
+          compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
+          compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
+          compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
+          compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
+          compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+          pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 448)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
           if (likely((threadIdx.x < 2))) {
             if (likely((threadIdx.x < 2))) {
               if (likely((threadIdx.x < 2))) {
                 if (likely((threadIdx.x < 2))) {
                   if (likely((threadIdx.x < 2))) {
-                    if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
-                      placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 2)]
+                    if (likely((threadIdx.x < 2))) {
+                      if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
+                        placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 2)]
+                      }
                     }
                   }
                 }
               }
             }
           }
-        }
-        compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
-        compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
-        compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
-        compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
-        compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
-        compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
-        compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
-        compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
-        compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
-        compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
-        compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
-        compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
-        compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
-        compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 217)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 441)], 0f)
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        if (likely((threadIdx.x < 2))) {
+          compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
+          compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
+          compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
+          compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
+          compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
+          compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
+          compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
+          compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
+          compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
+          compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
+          compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
+          compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
+          compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
+          compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+          pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 447)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 217)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 441)], 0f)
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
           if (likely((threadIdx.x < 2))) {
             if (likely((threadIdx.x < 2))) {
               if (likely((threadIdx.x < 2))) {
                 if (likely((threadIdx.x < 2))) {
                   if (likely((threadIdx.x < 2))) {
-                    if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
-                      placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 3)]
+                    if (likely((threadIdx.x < 2))) {
+                      if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
+                        placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 3)]
+                      }
                     }
                   }
                 }
               }
             }
           }
-        }
-        compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
-        compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
-        compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
-        compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
-        compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
-        compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
-        compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
-        compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
-        compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
-        compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
-        compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
-        compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
-        compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
-        compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 217)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 441)], 0f)
-        pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 216)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 440)], 0f)
-        // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
-        // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
-        // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
-        if (likely((threadIdx.x < 2))) {
+          compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
+          compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
+          compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
+          compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
+          compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
+          compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
+          compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
+          compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
+          compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
+          compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
+          compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
+          compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
+          compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
+          compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
+          pad_temp.shared[(threadIdx.x*7)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 446)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 1)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 445)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 2)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 444)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 3)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 443)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 4)] = tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 442)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 5)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 217)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 441)], 0f)
+          pad_temp.shared[((threadIdx.x*7) + 6)] = tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x*7)) < 216)), placeholder[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x*7)) - 440)], 0f)
+          // attr [iter_var(threadIdx.z, , threadIdx.z)] thread_extent = 1
+          // attr [iter_var(threadIdx.y, , threadIdx.y)] thread_extent = 1
+          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 16
           if (likely((threadIdx.x < 2))) {
             if (likely((threadIdx.x < 2))) {
               if (likely((threadIdx.x < 2))) {
                 if (likely((threadIdx.x < 2))) {
                   if (likely((threadIdx.x < 2))) {
-                    if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
-                      placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 4)]
+                    if (likely((threadIdx.x < 2))) {
+                      if (likely((((blockIdx.z*2) + threadIdx.x) < 10))) {
+                        placeholder.shared[threadIdx.x] = placeholder[(((((blockIdx.z*150) + (threadIdx.x*75)) + (rc.outer*25)) + (ry.outer*5)) + 4)]
+                      }
                     }
                   }
                 }
               }
             }
           }
+          compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
+          compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
+          compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
+          compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
+          compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
+          compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
+          compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
+          compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
+          compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
+          compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
+          compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
+          compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
+          compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
+          compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
         }
-        compute[0] = (compute[0] + (pad_temp.shared[threadIdx.x]*placeholder.shared[0]))
-        compute[2] = (compute[2] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[0]))
-        compute[4] = (compute[4] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[0]))
-        compute[6] = (compute[6] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[0]))
-        compute[8] = (compute[8] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[0]))
-        compute[10] = (compute[10] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[0]))
-        compute[12] = (compute[12] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[0]))
-        compute[1] = (compute[1] + (pad_temp.shared[threadIdx.x]*placeholder.shared[1]))
-        compute[3] = (compute[3] + (pad_temp.shared[(threadIdx.x + 16)]*placeholder.shared[1]))
-        compute[5] = (compute[5] + (pad_temp.shared[(threadIdx.x + 32)]*placeholder.shared[1]))
-        compute[7] = (compute[7] + (pad_temp.shared[(threadIdx.x + 48)]*placeholder.shared[1]))
-        compute[9] = (compute[9] + (pad_temp.shared[(threadIdx.x + 64)]*placeholder.shared[1]))
-        compute[11] = (compute[11] + (pad_temp.shared[(threadIdx.x + 80)]*placeholder.shared[1]))
-        compute[13] = (compute[13] + (pad_temp.shared[(threadIdx.x + 96)]*placeholder.shared[1]))
       }
+      compute[((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x)] = max(compute[0], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 16)] = max(compute[2], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 32)] = max(compute[4], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 48)] = max(compute[6], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 64)] = max(compute[8], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 80)] = max(compute[10], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 96)] = max(compute[12], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50176)] = max(compute[1], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50192)] = max(compute[3], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50208)] = max(compute[5], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50224)] = max(compute[7], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50240)] = max(compute[9], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50256)] = max(compute[11], 0f)
+      compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50272)] = max(compute[13], 0f)
     }
-    compute[((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x)] = max(compute[0], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 16)] = max(compute[2], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 32)] = max(compute[4], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 48)] = max(compute[6], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 64)] = max(compute[8], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 80)] = max(compute[10], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 96)] = max(compute[12], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50176)] = max(compute[1], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50192)] = max(compute[3], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50208)] = max(compute[5], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50224)] = max(compute[7], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50240)] = max(compute[9], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50256)] = max(compute[11], 0f)
-    compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50272)] = max(compute[13], 0f)
+
 
 
 
diff --git a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
index efb74d2..a7fc5e9 100644
--- a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:00.484** total execution time for **tutorials_topi** files:
+**00:00.546** total execution time for **tutorials_topi** files:
 
-- **00:00.484**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
+- **00:00.546**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
diff --git a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 8ff478f..6fe438a 100644
--- a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:03.231** total execution time for **vta_tutorials_autotvm** files:
+**00:04.202** total execution time for **vta_tutorials_autotvm** files:
 
-- **00:03.231**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:04.202**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
diff --git a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
index f1dd338..3ccb0c8 100644
--- a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
@@ -481,8 +481,8 @@ Finally, we launch tuning jobs and evaluate the end-to-end performance.
  .. code-block:: none
 
     Extract tasks...
-
    ...1%, 0.01 MB, 46 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 92 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 139 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 183 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 229 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 273 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 319 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 364 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 409 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 452 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 497 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 540 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 585 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 628 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 673 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 716 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 761 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 803 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 848 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 888 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 932 KB/s, 0 seconds passed
 
    ...23%, 0.17 MB, 974 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 1018 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 1060 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 1104 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 1144 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 1188 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 1227 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 1270 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 1313 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 1356 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 1396 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 1439 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 1479 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 1522 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 1561 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 1604 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 1645 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 1687 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 1724 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 1766 KB/s, 0 seconds passed
    ...45%, 0.33 
 MB, 1807 KB/s, 0 seconds passed
    ...46%, 0.34 MB, 1849 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 1885 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 1927 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 1968 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 2010 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 2050 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 2092 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 2129 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 2170 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 2210 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 2251 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 2287 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 2329 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 2370 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 2411 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 2450 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 2491 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 2525 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 2566 KB/s, 0 seconds passed
    ...67%, 0.48 MB, 2604 KB/s, 0 
 seconds passed
    ...68%, 0.49 MB, 2645 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 2685 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 2726 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 2760 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 2801 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 2839 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 2879 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 2911 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 2952 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 2992 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 3032 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 3072 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 3112 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 3146 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 3186 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 3225 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 3265 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 3301 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 3341 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 3377 KB/s, 0 seconds passed
  
   ...90%, 0.65 MB, 3417 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 3456 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 3495 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 3526 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 3566 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 3604 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 3644 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 3676 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 3715 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 3752 KB/s, 0 seconds passed
-    Exception in thread Thread-10:
+
    ...1%, 0.01 MB, 42 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 83 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 125 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 166 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 208 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 249 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 290 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 330 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 372 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 412 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 453 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 493 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 533 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 573 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 613 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 653 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 694 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 733 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 773 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 812 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 852 KB/s, 0 seconds passed
 
    ...23%, 0.17 MB, 890 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 931 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 970 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 1011 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 1046 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 1086 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 1125 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 1165 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 1202 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 1241 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 1281 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 1320 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 1356 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 1395 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 1434 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 1473 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 1507 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 1546 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 1585 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 1624 KB/s, 0 seconds passed
    ...45%, 0.33 MB
 , 1656 KB/s, 0 seconds passed
    ...46%, 0.34 MB, 1694 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 1731 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 1770 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 1805 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 1843 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 1880 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 1918 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 1952 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 1990 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 2028 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 2067 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 2102 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 2140 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 2174 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 2212 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 2247 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 2285 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 2322 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 2360 KB/s, 0 seconds passed
    ...67%, 0.48 MB, 2394 KB/s, 0 se
 conds passed
    ...68%, 0.49 MB, 2432 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 2467 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 2505 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 2542 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 2579 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 2612 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 2649 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 2681 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 2719 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 2753 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 2790 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 2827 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 2864 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 2898 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 2935 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 2969 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 3006 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 3042 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 3079 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 3115 KB/s, 0 seconds passed
    
 ...90%, 0.65 MB, 3152 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 3181 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 3218 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 3254 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 3291 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 3327 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 3363 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 3393 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 3430 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 3464 KB/s, 0 seconds passed
+    Exception in thread Thread-9:
     Traceback (most recent call last):
       File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
         self.run()
@@ -497,11 +497,11 @@ Finally, we launch tuning jobs and evaluate the end-to-end performance.
       File "tvm/_ffi/_cython/./packed_func.pxi", line 236, in tvm._ffi._cy3.core.FuncCall3
       File "tvm/_ffi/_cython/./base.pxi", line 160, in tvm._ffi._cy3.core.CALL
     tvm._ffi.base.TVMError: Traceback (most recent call last):
-      [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f90b15790e1]
-      [bt] (3) /workspace/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphRuntimeCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#5}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x17) [0x7f90b [...]
-      [bt] (2) /workspace/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#5}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x58) [0x7f90b141ba08]
-      [bt] (1) /workspace/build/libtvm.so(tvm::runtime::TVMArgValue::operator std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >[abi:cxx11]() const+0x13b) [0x7f90b0c7db6b]
-      [bt] (0) /workspace/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7f90b0c67d22]
+      [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f897ab62ce1]
+      [bt] (3) /workspace/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphRuntimeCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#5}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x17) [0x7f897 [...]
+      [bt] (2) /workspace/build/libtvm.so(tvm::relay::backend::GraphRuntimeCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#5}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x58) [0x7f897aa05798]
+      [bt] (1) /workspace/build/libtvm.so(tvm::runtime::TVMArgValue::operator std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >[abi:cxx11]() const+0x13b) [0x7f897a24f4eb]
+      [bt] (0) /workspace/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x32) [0x7f897a2396a2]
       File "/workspace/include/tvm/runtime/packed_func.h", line 507
     TVMError: Check failed: type_code_ == kTVMStr (8 vs. 11) : expected str but get Object
 
diff --git a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
index 05bea38..f0f5288 100644
--- a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -243,8 +243,8 @@ The compilation steps are:
 
  .. code-block:: none
 
-
    ...12%, 0.01 MB, 56 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 112 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 167 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 222 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 277 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 332 KB/s, 0 seconds passed
    ...90%, 0.05 MB, 387 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 441 KB/s, 0 seconds passed
-    resnet18_v1 inference graph built in 3.54s!
+
    ...12%, 0.01 MB, 44 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 88 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 132 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 175 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 219 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 262 KB/s, 0 seconds passed
    ...90%, 0.05 MB, 305 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 348 KB/s, 0 seconds passed
+    resnet18_v1 inference graph built in 4.79s!
 
 
 
diff --git a/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
index c9963ef..61e9bc4 100644
--- a/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -315,7 +315,7 @@ The compilation steps are:
 
  .. code-block:: none
 
-    yolov3-tiny inference graph built in 4.76s!
+    yolov3-tiny inference graph built in 6.09s!
 
 
 
diff --git a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
index eb3f2dc..0721920 100644
--- a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:49.135** total execution time for **vta_tutorials_frontend** files:
+**00:58.106** total execution time for **vta_tutorials_frontend** files:
 
-- **00:29.567**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
-- **00:19.568**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:34.005**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
+- **00:24.101**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/vta/tutorials/matrix_multiply.rst.txt b/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
index 624e877..c1908be 100644
--- a/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
+++ b/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
@@ -302,45 +302,48 @@ After we construct the schedule, by default the schedule computes
 
  .. code-block:: none
 
-    // attr [A_buf] storage_scope = "global"
-    allocate A_buf[int8 * 256]
-    // attr [B_buf] storage_scope = "global"
-    allocate B_buf[int8 * 65536]
-    // attr [C_buf] storage_scope = "global"
-    allocate C_buf[int32 * 256]
-    for (i1, 0, 16) {
-      for (i3, 0, 16) {
-        A_buf[((i1*16) + i3)] = A[((i1*16) + i3)]
-      }
-    }
-    for (i0, 0, 16) {
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [A_buf] storage_scope = "global"
+      allocate A_buf[int8 * 256]
+      // attr [B_buf] storage_scope = "global"
+      allocate B_buf[int8 * 65536]
+      // attr [C_buf] storage_scope = "global"
+      allocate C_buf[int32 * 256]
       for (i1, 0, 16) {
-        for (i2, 0, 16) {
-          for (i3, 0, 16) {
-            B_buf[((((i0*4096) + (i1*256)) + (i2*16)) + i3)] = B[((((i0*4096) + (i1*256)) + (i2*16)) + i3)]
+        for (i3, 0, 16) {
+          A_buf[((i1*16) + i3)] = A[((i1*16) + i3)]
+        }
+      }
+      for (i0, 0, 16) {
+        for (i1, 0, 16) {
+          for (i2, 0, 16) {
+            for (i3, 0, 16) {
+              B_buf[((((i0*4096) + (i1*256)) + (i2*16)) + i3)] = B[((((i0*4096) + (i1*256)) + (i2*16)) + i3)]
+            }
           }
         }
       }
-    }
-    for (co, 0, 16) {
-      for (ci, 0, 16) {
-        C_buf[((co*16) + ci)] = 0
-        for (ko, 0, 16) {
-          for (ki, 0, 16) {
-            C_buf[((co*16) + ci)] = (C_buf[((co*16) + ci)] + (int32(A_buf[((ko*16) + ki)])*int32(B_buf[((((co*4096) + (ko*256)) + (ci*16)) + ki)])))
+      for (co, 0, 16) {
+        for (ci, 0, 16) {
+          C_buf[((co*16) + ci)] = 0
+          for (ko, 0, 16) {
+            for (ki, 0, 16) {
+              C_buf[((co*16) + ci)] = (C_buf[((co*16) + ci)] + (int32(A_buf[((ko*16) + ki)])*int32(B_buf[((((co*4096) + (ko*256)) + (ci*16)) + ki)])))
+            }
           }
         }
       }
-    }
-    for (i1, 0, 16) {
-      for (i3, 0, 16) {
-        C[((i1*16) + i3)] = int8(C_buf[((i1*16) + i3)])
+      for (i1, 0, 16) {
+        for (i3, 0, 16) {
+          C[((i1*16) + i3)] = int8(C_buf[((i1*16) + i3)])
+        }
       }
     }
 
 
 
 
+
 Although this schedule makes sense, it won't compile to VTA.
 In order to obtain correct code generation, we need to apply scheduling
 primitives and code annotation that will transform the schedule into
@@ -439,40 +442,43 @@ moving the copy operations into the matrix multiplication loop.
 
  .. code-block:: none
 
-    // attr [C_buf] storage_scope = "local.acc_buffer"
-    allocate C_buf[int32 * 256]
-    // attr [A_buf] storage_scope = "local.inp_buffer"
-    allocate A_buf[int8 * 16]
-    // attr [B_buf] storage_scope = "local.wgt_buffer"
-    allocate B_buf[int8 * 16]
-    for (co, 0, 16) {
-      for (ci, 0, 16) {
-        C_buf[((co*16) + ci)] = 0
-        for (ko, 0, 16) {
-          // attr [iter_var(i0, )] pragma_dma_copy = 1
-          for (i3, 0, 16) {
-            A_buf[i3] = A[((ko*16) + i3)]
-          }
-          // attr [iter_var(i0, )] pragma_dma_copy = 1
-          for (i3, 0, 16) {
-            B_buf[i3] = B[((((co*4096) + (ko*256)) + (ci*16)) + i3)]
-          }
-          for (ki, 0, 16) {
-            C_buf[((co*16) + ci)] = (C_buf[((co*16) + ci)] + (int32(A_buf[ki])*int32(B_buf[ki])))
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [C_buf] storage_scope = "local.acc_buffer"
+      allocate C_buf[int32 * 256]
+      // attr [A_buf] storage_scope = "local.inp_buffer"
+      allocate A_buf[int8 * 16]
+      // attr [B_buf] storage_scope = "local.wgt_buffer"
+      allocate B_buf[int8 * 16]
+      for (co, 0, 16) {
+        for (ci, 0, 16) {
+          C_buf[((co*16) + ci)] = 0
+          for (ko, 0, 16) {
+            // attr [iter_var(i0, )] pragma_dma_copy = 1
+            for (i3, 0, 16) {
+              A_buf[i3] = A[((ko*16) + i3)]
+            }
+            // attr [iter_var(i0, )] pragma_dma_copy = 1
+            for (i3, 0, 16) {
+              B_buf[i3] = B[((((co*4096) + (ko*256)) + (ci*16)) + i3)]
+            }
+            for (ki, 0, 16) {
+              C_buf[((co*16) + ci)] = (C_buf[((co*16) + ci)] + (int32(A_buf[ki])*int32(B_buf[ki])))
+            }
           }
         }
       }
-    }
-    // attr [iter_var(i0, )] pragma_dma_copy = 1
-    for (i1, 0, 16) {
-      for (i3, 0, 16) {
-        C[((i1*16) + i3)] = int8(C_buf[((i1*16) + i3)])
+      // attr [iter_var(i0, )] pragma_dma_copy = 1
+      for (i1, 0, 16) {
+        for (i3, 0, 16) {
+          C[((i1*16) + i3)] = int8(C_buf[((i1*16) + i3)])
+        }
       }
     }
 
 
 
 
+
 Tensorization
 ~~~~~~~~~~~~~
 The last step of the schedule transformation consists in applying
@@ -519,35 +525,38 @@ by the VTA runtime JIT compiler.
 
  .. code-block:: none
 
-    // attr [C_buf] storage_scope = "local.acc_buffer"
-    // attr [A_buf] storage_scope = "local.inp_buffer"
-    // attr [B_buf] storage_scope = "local.wgt_buffer"
-    // attr [iter_var(vta, , vta)] coproc_scope = 2
-    // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-    VTAUopLoopBegin(16, 1, 0, 0)
-    VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0)
-    VTAUopLoopEnd()
-    vta.coproc_dep_push(2, 1)
-    for (ko, 0, 16) {
-      // attr [iter_var(vta, , vta)] coproc_scope = 1
-      vta.coproc_dep_pop(2, 1)
-      VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), A, ko, 1, 1, 1, 0, 0, 0, 0, 0, 2)
-      VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), B, ko, 1, 16, 16, 0, 0, 0, 0, 0, 1)
-      vta.coproc_dep_push(1, 2)
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [C_buf] storage_scope = "local.acc_buffer"
+      // attr [A_buf] storage_scope = "local.inp_buffer"
+      // attr [B_buf] storage_scope = "local.wgt_buffer"
       // attr [iter_var(vta, , vta)] coproc_scope = 2
-      vta.coproc_dep_pop(1, 2)
       // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-      VTAUopLoopBegin(16, 1, 0, 1)
-      VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0)
+      VTAUopLoopBegin(16, 1, 0, 0)
+      VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0)
       VTAUopLoopEnd()
       vta.coproc_dep_push(2, 1)
+      for (ko, 0, 16) {
+        // attr [iter_var(vta, , vta)] coproc_scope = 1
+        vta.coproc_dep_pop(2, 1)
+        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), A, ko, 1, 1, 1, 0, 0, 0, 0, 0, 2)
+        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), B, ko, 1, 16, 16, 0, 0, 0, 0, 0, 1)
+        vta.coproc_dep_push(1, 2)
+        // attr [iter_var(vta, , vta)] coproc_scope = 2
+        vta.coproc_dep_pop(1, 2)
+        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
+        VTAUopLoopBegin(16, 1, 0, 1)
+        VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0)
+        VTAUopLoopEnd()
+        vta.coproc_dep_push(2, 1)
+      }
+      vta.coproc_dep_push(2, 3)
+      vta.coproc_dep_pop(2, 1)
+      // attr [iter_var(vta, , vta)] coproc_scope = 3
+      vta.coproc_dep_pop(2, 3)
+      VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), 0, 4, C, 0, 16, 1, 16)
+      vta.coproc_sync()
     }
-    vta.coproc_dep_push(2, 3)
-    vta.coproc_dep_pop(2, 1)
-    // attr [iter_var(vta, , vta)] coproc_scope = 3
-    vta.coproc_dep_pop(2, 3)
-    VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), 0, 4, C, 0, 16, 1, 16)
-    vta.coproc_sync()
+
 
 
 
diff --git a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
index ec65809..8588f2d 100644
--- a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
@@ -255,44 +255,46 @@ Those include:
 
  .. code-block:: none
 
-    // attr [data_buf] storage_scope = "global"
-    allocate data_buf[int8 * 65536]
-    // attr [kernel_buf] storage_scope = "global"
-    allocate kernel_buf[int8 * 589824]
-    // attr [res_conv] storage_scope = "global"
-    allocate res_conv[int32 * 50176]
-    for (i1, 0, 16) {
-      for (i2, 0, 16) {
-        for (i3, 0, 16) {
-          for (i5, 0, 16) {
-            data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), data[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], (int8)0)
+    PrimFunc([data, kernel, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [data_buf] storage_scope = "global"
+      allocate data_buf[int8 * 65536]
+      // attr [kernel_buf] storage_scope = "global"
+      allocate kernel_buf[int8 * 589824]
+      // attr [res_conv] storage_scope = "global"
+      allocate res_conv[int32 * 50176]
+      for (i1, 0, 16) {
+        for (i2, 0, 16) {
+          for (i3, 0, 16) {
+            for (i5, 0, 16) {
+              data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), data[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], (int8)0)
+            }
           }
         }
       }
-    }
-    for (i0, 0, 16) {
-      for (i1, 0, 16) {
-        for (i2, 0, 3) {
-          for (i3, 0, 3) {
-            for (i4, 0, 16) {
-              for (i5, 0, 16) {
-                kernel_buf[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)] = kernel[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)]
+      for (i0, 0, 16) {
+        for (i1, 0, 16) {
+          for (i2, 0, 3) {
+            for (i3, 0, 3) {
+              for (i4, 0, 16) {
+                for (i5, 0, 16) {
+                  kernel_buf[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)] = kernel[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)]
+                }
               }
             }
           }
         }
       }
-    }
-    for (co, 0, 16) {
-      for (i, 0, 14) {
-        for (j, 0, 14) {
-          for (ci, 0, 16) {
-            res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = 0
-            for (ic, 0, 16) {
-              for (dy, 0, 3) {
-                for (dx, 0, 3) {
-                  for (ic_tns, 0, 16) {
-                    res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = (res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] + (int32(data_buf[((((((ic*4096) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[((((((co*36864) + (ic*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
+      for (co, 0, 16) {
+        for (i, 0, 14) {
+          for (j, 0, 14) {
+            for (ci, 0, 16) {
+              res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = 0
+              for (ic, 0, 16) {
+                for (dy, 0, 3) {
+                  for (dx, 0, 3) {
+                    for (ic_tns, 0, 16) {
+                      res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = (res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] + (int32(data_buf[((((((ic*4096) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[((((((co*36864) + (ic*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
+                    }
                   }
                 }
               }
@@ -300,39 +302,39 @@ Those include:
           }
         }
       }
-    }
-    for (i1, 0, 16) {
-      for (i2, 0, 14) {
-        for (i3, 0, 14) {
-          for (i5, 0, 16) {
-            res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = shift_right(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 8)
+      for (i1, 0, 16) {
+        for (i2, 0, 14) {
+          for (i3, 0, 14) {
+            for (i5, 0, 16) {
+              res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = shift_right(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 8)
+            }
           }
         }
       }
-    }
-    for (i1, 0, 16) {
-      for (i2, 0, 14) {
-        for (i3, 0, 14) {
-          for (i5, 0, 16) {
-            res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = max(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 0)
+      for (i1, 0, 16) {
+        for (i2, 0, 14) {
+          for (i3, 0, 14) {
+            for (i5, 0, 16) {
+              res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = max(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 0)
+            }
           }
         }
       }
-    }
-    for (i1, 0, 16) {
-      for (i2, 0, 14) {
-        for (i3, 0, 14) {
-          for (i5, 0, 16) {
-            res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = min(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 127)
+      for (i1, 0, 16) {
+        for (i2, 0, 14) {
+          for (i3, 0, 14) {
+            for (i5, 0, 16) {
+              res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = min(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)], 127)
+            }
           }
         }
       }
-    }
-    for (i1, 0, 16) {
-      for (i2, 0, 14) {
-        for (i3, 0, 14) {
-          for (i5, 0, 16) {
-            res[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = int8(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)])
+      for (i1, 0, 16) {
+        for (i2, 0, 14) {
+          for (i3, 0, 14) {
+            for (i5, 0, 16) {
+              res[((((i1*3136) + (i2*224)) + (i3*16)) + i5)] = int8(res_conv[((((i1*3136) + (i2*224)) + (i3*16)) + i5)])
+            }
           }
         }
       }
@@ -341,6 +343,7 @@ Those include:
 
 
 
+
 Blocking the Computation
 ~~~~~~~~~~~~~~~~~~~~~~~~
 The 2D convolution is by default too large for activations or kernel weights
@@ -442,55 +445,57 @@ below.
 
  .. code-block:: none
 
-    // attr [data_buf] storage_scope = "global"
-    allocate data_buf[int8 * 65536]
-    // attr [kernel_buf] storage_scope = "global"
-    allocate kernel_buf[int8 * 589824]
-    // attr [res_conv] storage_scope = "global"
-    allocate res_conv[int32 * 25088]
-    for (i1, 0, 16) {
-      for (i2, 0, 16) {
-        for (i3, 0, 16) {
-          for (i5, 0, 16) {
-            data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), data[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], (int8)0)
+    PrimFunc([data, kernel, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [data_buf] storage_scope = "global"
+      allocate data_buf[int8 * 65536]
+      // attr [kernel_buf] storage_scope = "global"
+      allocate kernel_buf[int8 * 589824]
+      // attr [res_conv] storage_scope = "global"
+      allocate res_conv[int32 * 25088]
+      for (i1, 0, 16) {
+        for (i2, 0, 16) {
+          for (i3, 0, 16) {
+            for (i5, 0, 16) {
+              data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), data[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], (int8)0)
+            }
           }
         }
       }
-    }
-    for (i0, 0, 16) {
-      for (i1, 0, 16) {
-        for (i2, 0, 3) {
-          for (i3, 0, 3) {
-            for (i4, 0, 16) {
-              for (i5, 0, 16) {
-                kernel_buf[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)] = kernel[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)]
+      for (i0, 0, 16) {
+        for (i1, 0, 16) {
+          for (i2, 0, 3) {
+            for (i3, 0, 3) {
+              for (i4, 0, 16) {
+                for (i5, 0, 16) {
+                  kernel_buf[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)] = kernel[((((((i0*36864) + (i1*2304)) + (i2*768)) + (i3*256)) + (i4*16)) + i5)]
+                }
               }
             }
           }
         }
       }
-    }
-    for (i2.outer, 0, 2) {
-      for (co.init, 0, 8) {
-        for (i.init, 0, 7) {
-          for (j.init, 0, 14) {
-            for (ci.init, 0, 16) {
-              res_conv[((((co.init*1568) + (i.init*224)) + (j.init*16)) + ci.init)] = 0
-              res_conv[(((((co.init*1568) + (i.init*224)) + (j.init*16)) + ci.init) + 12544)] = 0
+      for (i2.outer, 0, 2) {
+        for (co.init, 0, 8) {
+          for (i.init, 0, 7) {
+            for (j.init, 0, 14) {
+              for (ci.init, 0, 16) {
+                res_conv[((((co.init*1568) + (i.init*224)) + (j.init*16)) + ci.init)] = 0
+                res_conv[(((((co.init*1568) + (i.init*224)) + (j.init*16)) + ci.init) + 12544)] = 0
+              }
             }
           }
         }
-      }
-      for (ic.outer, 0, 16) {
-        for (co, 0, 8) {
-          for (i, 0, 7) {
-            for (dy, 0, 3) {
-              for (dx, 0, 3) {
-                for (j, 0, 14) {
-                  for (ci, 0, 16) {
-                    for (ic_tns, 0, 16) {
-                      res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] = (res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] + (int32(data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
-                      res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] = (res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] + (int32(data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[(((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns) + 294912)])))
+        for (ic.outer, 0, 16) {
+          for (co, 0, 8) {
+            for (i, 0, 7) {
+              for (dy, 0, 3) {
+                for (dx, 0, 3) {
+                  for (j, 0, 14) {
+                    for (ci, 0, 16) {
+                      for (ic_tns, 0, 16) {
+                        res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] = (res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] + (int32(data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
+                        res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] = (res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] + (int32(data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*int32(kernel_buf[(((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns) + 294912)])))
+                      }
                     }
                   }
                 }
@@ -498,43 +503,43 @@ below.
             }
           }
         }
-      }
-      for (i1, 0, 8) {
-        for (i2, 0, 7) {
-          for (i3, 0, 14) {
-            for (i5, 0, 16) {
-              res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = shift_right(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 8)
-              res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = shift_right(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 8)
+        for (i1, 0, 8) {
+          for (i2, 0, 7) {
+            for (i3, 0, 14) {
+              for (i5, 0, 16) {
+                res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = shift_right(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 8)
+                res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = shift_right(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 8)
+              }
             }
           }
         }
-      }
-      for (i1, 0, 8) {
-        for (i2, 0, 7) {
-          for (i3, 0, 14) {
-            for (i5, 0, 16) {
-              res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = max(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 0)
-              res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = max(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 0)
+        for (i1, 0, 8) {
+          for (i2, 0, 7) {
+            for (i3, 0, 14) {
+              for (i5, 0, 16) {
+                res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = max(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 0)
+                res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = max(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 0)
+              }
             }
           }
         }
-      }
-      for (i1, 0, 8) {
-        for (i2, 0, 7) {
-          for (i3, 0, 14) {
-            for (i5, 0, 16) {
-              res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = min(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 127)
-              res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = min(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 127)
+        for (i1, 0, 8) {
+          for (i2, 0, 7) {
+            for (i3, 0, 14) {
+              for (i5, 0, 16) {
+                res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)] = min(res_conv[((((i1*1568) + (i2*224)) + (i3*16)) + i5)], 127)
+                res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)] = min(res_conv[(((((i1*1568) + (i2*224)) + (i3*16)) + i5) + 12544)], 127)
+              }
             }
           }
         }
-      }
-      for (i1.inner, 0, 8) {
-        for (i2.inner, 0, 7) {
-          for (i3.inner, 0, 14) {
-            for (i5, 0, 16) {
-              res[(((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5)] = int8(res_conv[((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5)])
-              res[((((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5) + 25088)] = int8(res_conv[(((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5) + 12544)])
+        for (i1.inner, 0, 8) {
+          for (i2.inner, 0, 7) {
+            for (i3.inner, 0, 14) {
+              for (i5, 0, 16) {
+                res[(((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5)] = int8(res_conv[((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5)])
+                res[((((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5) + 25088)] = int8(res_conv[(((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5) + 12544)])
+              }
             }
           }
         }
@@ -544,6 +549,7 @@ below.
 
 
 
+
 Lowering Copies to DMA Transfers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Next we set the buffer scopes to the corresponding on-chip VTA SRAM buffers.
@@ -616,88 +622,91 @@ and mapping the shift, and clipping computation to the vector ALU.
 
  .. code-block:: none
 
-    // attr [res_conv] storage_scope = "local.acc_buffer"
-    // attr [data_buf] storage_scope = "local.inp_buffer"
-    // attr [kernel_buf] storage_scope = "local.wgt_buffer"
-    vta.coproc_dep_push(3, 2)
-    vta.coproc_dep_push(3, 2)
-    for (i2.outer, 0, 2) {
-      for (cthread.s, 0, 2) {
-        // attr [iter_var(vta, , vta)] coproc_scope = 2
-        vta.coproc_dep_pop(3, 2)
-        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-        VTAUopLoopBegin(8, 98, 0, 0)
-        VTAUopLoopBegin(7, 14, 0, 0)
-        for (j.init, 0, 14) {
-          VTAUopPush(0, 1, ((cthread.s*784) + j.init), 0, 0, 0, 0, 0)
-        }
-        VTAUopLoopEnd()
-        VTAUopLoopEnd()
-        vta.coproc_dep_push(2, 1)
-      }
-      for (ic.outer, 0, 16) {
-        // attr [iter_var(vta, , vta)] coproc_scope = 1
-        vta.coproc_dep_pop(2, 1)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 0, 2)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), kernel, (ic.outer*9), 9, 8, 144, 0, 0, 0, 0, 0, 1)
-        vta.coproc_dep_push(1, 2)
-        // attr [iter_var(vta, , vta)] coproc_scope = 1
-        vta.coproc_dep_pop(2, 1)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 144, 2)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), kernel, ((ic.outer*9) + 1152), 9, 8, 144, 0, 0, 0, 0, 72, 1)
-        vta.coproc_dep_push(1, 2)
+    PrimFunc([data, kernel, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [res_conv] storage_scope = "local.acc_buffer"
+      // attr [data_buf] storage_scope = "local.inp_buffer"
+      // attr [kernel_buf] storage_scope = "local.wgt_buffer"
+      vta.coproc_dep_push(3, 2)
+      vta.coproc_dep_push(3, 2)
+      for (i2.outer, 0, 2) {
         for (cthread.s, 0, 2) {
           // attr [iter_var(vta, , vta)] coproc_scope = 2
-          vta.coproc_dep_pop(1, 2)
+          vta.coproc_dep_pop(3, 2)
           // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-          VTAUopLoopBegin(8, 98, 0, 9)
-          VTAUopLoopBegin(7, 14, 16, 0)
-          for (dy, 0, 3) {
-            for (dx, 0, 3) {
-              for (j, 0, 14) {
-                VTAUopPush(0, 0, ((cthread.s*784) + j), ((((cthread.s*144) + (dy*16)) + j) + dx), (((cthread.s*72) + (dy*3)) + dx), 0, 0, 0)
+          VTAUopLoopBegin(8, 98, 0, 0)
+          VTAUopLoopBegin(7, 14, 0, 0)
+          for (j.init, 0, 14) {
+            VTAUopPush(0, 1, ((cthread.s*784) + j.init), 0, 0, 0, 0, 0)
+          }
+          VTAUopLoopEnd()
+          VTAUopLoopEnd()
+          vta.coproc_dep_push(2, 1)
+        }
+        for (ic.outer, 0, 16) {
+          // attr [iter_var(vta, , vta)] coproc_scope = 1
+          vta.coproc_dep_pop(2, 1)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 0, 2)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), kernel, (ic.outer*9), 9, 8, 144, 0, 0, 0, 0, 0, 1)
+          vta.coproc_dep_push(1, 2)
+          // attr [iter_var(vta, , vta)] coproc_scope = 1
+          vta.coproc_dep_pop(2, 1)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 144, 2)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), kernel, ((ic.outer*9) + 1152), 9, 8, 144, 0, 0, 0, 0, 72, 1)
+          vta.coproc_dep_push(1, 2)
+          for (cthread.s, 0, 2) {
+            // attr [iter_var(vta, , vta)] coproc_scope = 2
+            vta.coproc_dep_pop(1, 2)
+            // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
+            VTAUopLoopBegin(8, 98, 0, 9)
+            VTAUopLoopBegin(7, 14, 16, 0)
+            for (dy, 0, 3) {
+              for (dx, 0, 3) {
+                for (j, 0, 14) {
+                  VTAUopPush(0, 0, ((cthread.s*784) + j), ((((cthread.s*144) + (dy*16)) + j) + dx), (((cthread.s*72) + (dy*3)) + dx), 0, 0, 0)
+                }
               }
             }
+            VTAUopLoopEnd()
+            VTAUopLoopEnd()
+            vta.coproc_dep_push(2, 1)
           }
+        }
+        vta.coproc_dep_pop(2, 1)
+        vta.coproc_dep_pop(2, 1)
+        for (cthread.s, 0, 2) {
+          // attr [iter_var(vta, , vta)] coproc_scope = 2
+          // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+          VTAUopLoopBegin(784, 1, 1, 0)
+          VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 3, 1, 8)
           VTAUopLoopEnd()
+          // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+          VTAUopLoopBegin(784, 1, 1, 0)
+          VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 1, 1, 0)
           VTAUopLoopEnd()
-          vta.coproc_dep_push(2, 1)
+          // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+          VTAUopLoopBegin(784, 1, 1, 0)
+          VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 0, 1, 127)
+          VTAUopLoopEnd()
+          vta.coproc_dep_push(2, 3)
         }
-      }
-      vta.coproc_dep_pop(2, 1)
-      vta.coproc_dep_pop(2, 1)
-      for (cthread.s, 0, 2) {
-        // attr [iter_var(vta, , vta)] coproc_scope = 2
-        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-        VTAUopLoopBegin(784, 1, 1, 0)
-        VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 3, 1, 8)
-        VTAUopLoopEnd()
-        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-        VTAUopLoopBegin(784, 1, 1, 0)
-        VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 1, 1, 0)
-        VTAUopLoopEnd()
-        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-        VTAUopLoopBegin(784, 1, 1, 0)
-        VTAUopPush(1, 0, (cthread.s*784), (cthread.s*784), 0, 0, 1, 127)
-        VTAUopLoopEnd()
-        vta.coproc_dep_push(2, 3)
-      }
-      for (cthread.s, 0, 2) {
-        // attr [iter_var(vta, , vta)] coproc_scope = 3
-        vta.coproc_dep_pop(2, 3)
-        for (i1.inner, 0, 8) {
-          for (i2.inner, 0, 7) {
-            for (i3.inner, 0, 14) {
-              VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), ((((cthread.s*784) + (i1.inner*98)) + (i2.inner*14)) + i3.inner), 4, res, (((((cthread.s*1568) + (i1.inner*196)) + (i2.outer*98)) + (i2.inner*14)) + i3.inner), 1, 1, 1)
+        for (cthread.s, 0, 2) {
+          // attr [iter_var(vta, , vta)] coproc_scope = 3
+          vta.coproc_dep_pop(2, 3)
+          for (i1.inner, 0, 8) {
+            for (i2.inner, 0, 7) {
+              for (i3.inner, 0, 14) {
+                VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), ((((cthread.s*784) + (i1.inner*98)) + (i2.inner*14)) + i3.inner), 4, res, (((((cthread.s*1568) + (i1.inner*196)) + (i2.outer*98)) + (i2.inner*14)) + i3.inner), 1, 1, 1)
+              }
             }
           }
+          vta.coproc_dep_push(3, 2)
         }
-        vta.coproc_dep_push(3, 2)
       }
+      vta.coproc_dep_pop(3, 2)
+      vta.coproc_dep_pop(3, 2)
+      vta.coproc_sync()
     }
-    vta.coproc_dep_pop(3, 2)
-    vta.coproc_dep_pop(3, 2)
-    vta.coproc_sync()
+
 
 
 
diff --git a/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt b/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
index dfd2f74..7a43616 100644
--- a/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
@@ -200,60 +200,63 @@ Those include:
 
  .. code-block:: none
 
-    // attr [data_buf] storage_scope = "global"
-    allocate data_buf[int8 * 1024]
-    // attr [weight_buf] storage_scope = "global"
-    allocate weight_buf[int8 * 1048576]
-    // attr [res_gem] storage_scope = "global"
-    allocate res_gem[int32 * 1024]
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        data_buf[((i1*16) + i3)] = data[((i1*16) + i3)]
-      }
-    }
-    for (i0, 0, 64) {
+    PrimFunc([data, weight, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [data_buf] storage_scope = "global"
+      allocate data_buf[int8 * 1024]
+      // attr [weight_buf] storage_scope = "global"
+      allocate weight_buf[int8 * 1048576]
+      // attr [res_gem] storage_scope = "global"
+      allocate res_gem[int32 * 1024]
       for (i1, 0, 64) {
-        for (i2, 0, 16) {
-          for (i3, 0, 16) {
-            weight_buf[((((i0*16384) + (i1*256)) + (i2*16)) + i3)] = weight[((((i0*16384) + (i1*256)) + (i2*16)) + i3)]
+        for (i3, 0, 16) {
+          data_buf[((i1*16) + i3)] = data[((i1*16) + i3)]
+        }
+      }
+      for (i0, 0, 64) {
+        for (i1, 0, 64) {
+          for (i2, 0, 16) {
+            for (i3, 0, 16) {
+              weight_buf[((((i0*16384) + (i1*256)) + (i2*16)) + i3)] = weight[((((i0*16384) + (i1*256)) + (i2*16)) + i3)]
+            }
           }
         }
       }
-    }
-    for (co, 0, 64) {
-      for (ci, 0, 16) {
-        res_gem[((co*16) + ci)] = 0
-        for (ic, 0, 64) {
-          for (ic_tns, 0, 16) {
-            res_gem[((co*16) + ci)] = (res_gem[((co*16) + ci)] + (int32(data_buf[((ic*16) + ic_tns)])*int32(weight_buf[((((co*16384) + (ic*256)) + (ci*16)) + ic_tns)])))
+      for (co, 0, 64) {
+        for (ci, 0, 16) {
+          res_gem[((co*16) + ci)] = 0
+          for (ic, 0, 64) {
+            for (ic_tns, 0, 16) {
+              res_gem[((co*16) + ci)] = (res_gem[((co*16) + ci)] + (int32(data_buf[((ic*16) + ic_tns)])*int32(weight_buf[((((co*16384) + (ic*256)) + (ci*16)) + ic_tns)])))
+            }
           }
         }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        res_gem[((i1*16) + i3)] = shift_right(res_gem[((i1*16) + i3)], 8)
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          res_gem[((i1*16) + i3)] = shift_right(res_gem[((i1*16) + i3)], 8)
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        res_gem[((i1*16) + i3)] = max(res_gem[((i1*16) + i3)], 0)
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          res_gem[((i1*16) + i3)] = max(res_gem[((i1*16) + i3)], 0)
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        res_gem[((i1*16) + i3)] = min(res_gem[((i1*16) + i3)], 127)
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          res_gem[((i1*16) + i3)] = min(res_gem[((i1*16) + i3)], 127)
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        res[((i1*16) + i3)] = int8(res_gem[((i1*16) + i3)])
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          res[((i1*16) + i3)] = int8(res_gem[((i1*16) + i3)])
+        }
       }
     }
 
 
 
 
+
 Blocking the Computation
 ~~~~~~~~~~~~~~~~~~~~~~~~
 The matrix multiplication is by default too large for activations or weights
@@ -353,61 +356,63 @@ below:
 
  .. code-block:: none
 
-    // attr [data_buf] storage_scope = "global"
-    allocate data_buf[int8 * 1024]
-    // attr [weight_buf] storage_scope = "global"
-    allocate weight_buf[int8 * 1048576]
-    // attr [res_gem] storage_scope = "global"
-    allocate res_gem[int32 * 256]
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        data_buf[((i1*16) + i3)] = data[((i1*16) + i3)]
-      }
-    }
-    for (i0, 0, 64) {
+    PrimFunc([data, weight, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [data_buf] storage_scope = "global"
+      allocate data_buf[int8 * 1024]
+      // attr [weight_buf] storage_scope = "global"
+      allocate weight_buf[int8 * 1048576]
+      // attr [res_gem] storage_scope = "global"
+      allocate res_gem[int32 * 256]
       for (i1, 0, 64) {
-        for (i2, 0, 16) {
-          for (i3, 0, 16) {
-            weight_buf[((((i0*16384) + (i1*256)) + (i2*16)) + i3)] = weight[((((i0*16384) + (i1*256)) + (i2*16)) + i3)]
-          }
+        for (i3, 0, 16) {
+          data_buf[((i1*16) + i3)] = data[((i1*16) + i3)]
         }
       }
-    }
-    for (i1.outer, 0, 4) {
-      for (co.init, 0, 16) {
-        for (ci.init, 0, 16) {
-          res_gem[((co.init*16) + ci.init)] = 0
+      for (i0, 0, 64) {
+        for (i1, 0, 64) {
+          for (i2, 0, 16) {
+            for (i3, 0, 16) {
+              weight_buf[((((i0*16384) + (i1*256)) + (i2*16)) + i3)] = weight[((((i0*16384) + (i1*256)) + (i2*16)) + i3)]
+            }
+          }
         }
       }
-      for (ic.outer, 0, 4) {
-        for (co, 0, 16) {
-          for (ic.inner, 0, 16) {
-            for (ci, 0, 16) {
-              for (ic_tns, 0, 16) {
-                res_gem[((co*16) + ci)] = (res_gem[((co*16) + ci)] + (int32(data_buf[(((ic.outer*256) + (ic.inner*16)) + ic_tns)])*int32(weight_buf[((((((i1.outer*262144) + (co*16384)) + (ic.outer*4096)) + (ic.inner*256)) + (ci*16)) + ic_tns)])))
+      for (i1.outer, 0, 4) {
+        for (co.init, 0, 16) {
+          for (ci.init, 0, 16) {
+            res_gem[((co.init*16) + ci.init)] = 0
+          }
+        }
+        for (ic.outer, 0, 4) {
+          for (co, 0, 16) {
+            for (ic.inner, 0, 16) {
+              for (ci, 0, 16) {
+                for (ic_tns, 0, 16) {
+                  res_gem[((co*16) + ci)] = (res_gem[((co*16) + ci)] + (int32(data_buf[(((ic.outer*256) + (ic.inner*16)) + ic_tns)])*int32(weight_buf[((((((i1.outer*262144) + (co*16384)) + (ic.outer*4096)) + (ic.inner*256)) + (ci*16)) + ic_tns)])))
+                }
               }
             }
           }
         }
-      }
-      for (i1, 0, 16) {
-        for (i3, 0, 16) {
-          res_gem[((i1*16) + i3)] = shift_right(res_gem[((i1*16) + i3)], 8)
+        for (i1, 0, 16) {
+          for (i3, 0, 16) {
+            res_gem[((i1*16) + i3)] = shift_right(res_gem[((i1*16) + i3)], 8)
+          }
         }
-      }
-      for (i1, 0, 16) {
-        for (i3, 0, 16) {
-          res_gem[((i1*16) + i3)] = max(res_gem[((i1*16) + i3)], 0)
+        for (i1, 0, 16) {
+          for (i3, 0, 16) {
+            res_gem[((i1*16) + i3)] = max(res_gem[((i1*16) + i3)], 0)
+          }
         }
-      }
-      for (i1, 0, 16) {
-        for (i3, 0, 16) {
-          res_gem[((i1*16) + i3)] = min(res_gem[((i1*16) + i3)], 127)
+        for (i1, 0, 16) {
+          for (i3, 0, 16) {
+            res_gem[((i1*16) + i3)] = min(res_gem[((i1*16) + i3)], 127)
+          }
         }
-      }
-      for (i1.inner, 0, 16) {
-        for (i3, 0, 16) {
-          res[(((i1.outer*256) + (i1.inner*16)) + i3)] = int8(res_gem[((i1.inner*16) + i3)])
+        for (i1.inner, 0, 16) {
+          for (i3, 0, 16) {
+            res[(((i1.outer*256) + (i1.inner*16)) + i3)] = int8(res_gem[((i1.inner*16) + i3)])
+          }
         }
       }
     }
@@ -415,6 +420,7 @@ below:
 
 
 
+
 Lowering Copies to DMA Transfers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Next we set the buffer scopes to the corresponding on-chip VTA SRAM buffers.
@@ -487,58 +493,61 @@ and mapping the shift, and clipping computation to the vector ALU.
 
  .. code-block:: none
 
-    // attr [res_gem] storage_scope = "local.acc_buffer"
-    // attr [data_buf] storage_scope = "local.inp_buffer"
-    // attr [weight_buf] storage_scope = "local.wgt_buffer"
-    vta.coproc_dep_push(3, 2)
-    for (i1.outer, 0, 4) {
-      // attr [iter_var(vta, , vta)] coproc_scope = 2
-      vta.coproc_dep_pop(3, 2)
-      // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-      VTAUopLoopBegin(16, 1, 0, 0)
-      VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0)
-      VTAUopLoopEnd()
-      vta.coproc_dep_push(2, 1)
-      for (ic.outer, 0, 4) {
-        // attr [iter_var(vta, , vta)] coproc_scope = 1
-        vta.coproc_dep_pop(2, 1)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, (ic.outer*16), 16, 1, 16, 0, 0, 0, 0, 0, 2)
-        VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), weight, ((i1.outer*1024) + (ic.outer*16)), 16, 16, 64, 0, 0, 0, 0, 0, 1)
-        vta.coproc_dep_push(1, 2)
+    PrimFunc([data, weight, res]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [res_gem] storage_scope = "local.acc_buffer"
+      // attr [data_buf] storage_scope = "local.inp_buffer"
+      // attr [weight_buf] storage_scope = "local.wgt_buffer"
+      vta.coproc_dep_push(3, 2)
+      for (i1.outer, 0, 4) {
         // attr [iter_var(vta, , vta)] coproc_scope = 2
-        vta.coproc_dep_pop(1, 2)
+        vta.coproc_dep_pop(3, 2)
         // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
-        VTAUopLoopBegin(16, 1, 0, 16)
-        VTAUopLoopBegin(16, 0, 1, 1)
-        VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0)
-        VTAUopLoopEnd()
+        VTAUopLoopBegin(16, 1, 0, 0)
+        VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0)
         VTAUopLoopEnd()
         vta.coproc_dep_push(2, 1)
+        for (ic.outer, 0, 4) {
+          // attr [iter_var(vta, , vta)] coproc_scope = 1
+          vta.coproc_dep_pop(2, 1)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), data, (ic.outer*16), 16, 1, 16, 0, 0, 0, 0, 0, 2)
+          VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), weight, ((i1.outer*1024) + (ic.outer*16)), 16, 16, 64, 0, 0, 0, 0, 0, 1)
+          vta.coproc_dep_push(1, 2)
+          // attr [iter_var(vta, , vta)] coproc_scope = 2
+          vta.coproc_dep_pop(1, 2)
+          // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushGEMMOp"
+          VTAUopLoopBegin(16, 1, 0, 16)
+          VTAUopLoopBegin(16, 0, 1, 1)
+          VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0)
+          VTAUopLoopEnd()
+          VTAUopLoopEnd()
+          vta.coproc_dep_push(2, 1)
+        }
+        vta.coproc_dep_pop(2, 1)
+        // attr [iter_var(vta, , vta)] coproc_scope = 2
+        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+        VTAUopLoopBegin(16, 1, 1, 0)
+        VTAUopPush(1, 0, 0, 0, 0, 3, 1, 8)
+        VTAUopLoopEnd()
+        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+        VTAUopLoopBegin(16, 1, 1, 0)
+        VTAUopPush(1, 0, 0, 0, 0, 1, 1, 0)
+        VTAUopLoopEnd()
+        // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+        VTAUopLoopBegin(16, 1, 1, 0)
+        VTAUopPush(1, 0, 0, 0, 0, 0, 1, 127)
+        VTAUopLoopEnd()
+        vta.coproc_dep_push(2, 3)
+        // attr [iter_var(vta, , vta)] coproc_scope = 3
+        vta.coproc_dep_pop(2, 3)
+        for (i1.inner, 0, 16) {
+          VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), i1.inner, 4, res, ((i1.outer*16) + i1.inner), 1, 1, 1)
+        }
+        vta.coproc_dep_push(3, 2)
       }
-      vta.coproc_dep_pop(2, 1)
-      // attr [iter_var(vta, , vta)] coproc_scope = 2
-      // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-      VTAUopLoopBegin(16, 1, 1, 0)
-      VTAUopPush(1, 0, 0, 0, 0, 3, 1, 8)
-      VTAUopLoopEnd()
-      // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-      VTAUopLoopBegin(16, 1, 1, 0)
-      VTAUopPush(1, 0, 0, 0, 0, 1, 1, 0)
-      VTAUopLoopEnd()
-      // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-      VTAUopLoopBegin(16, 1, 1, 0)
-      VTAUopPush(1, 0, 0, 0, 0, 0, 1, 127)
-      VTAUopLoopEnd()
-      vta.coproc_dep_push(2, 3)
-      // attr [iter_var(vta, , vta)] coproc_scope = 3
-      vta.coproc_dep_pop(2, 3)
-      for (i1.inner, 0, 16) {
-        VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), i1.inner, 4, res, ((i1.outer*16) + i1.inner), 1, 1, 1)
-      }
-      vta.coproc_dep_push(3, 2)
+      vta.coproc_sync()
+      vta.coproc_dep_pop(3, 2)
     }
-    vta.coproc_sync()
-    vta.coproc_dep_pop(3, 2)
+
 
 
 
diff --git a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
index 5538231..bdd8817 100644
--- a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:02.898** total execution time for **vta_tutorials_optimize** files:
+**00:03.308** total execution time for **vta_tutorials_optimize** files:
 
-- **00:02.520**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.378**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:02.917**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.391**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
index 27e07b9..221431e 100644
--- a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:00.704** total execution time for **vta_tutorials** files:
+**00:00.705** total execution time for **vta_tutorials** files:
 
-- **00:00.354**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.350**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.358**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.347**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
diff --git a/docs/_sources/vta/tutorials/vta_get_started.rst.txt b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
index ba20b7e..e0694ef 100644
--- a/docs/_sources/vta/tutorials/vta_get_started.rst.txt
+++ b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
@@ -298,34 +298,37 @@ After we construct the schedule, by default the schedule computes
 
  .. code-block:: none
 
-    // attr [A_buf] storage_scope = "global"
-    allocate A_buf[int32 * 1024]
-    // attr [B_buf] storage_scope = "global"
-    allocate B_buf[int32 * 1024]
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        A_buf[((i1*16) + i3)] = A[((i1*16) + i3)]
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [A_buf] storage_scope = "global"
+      allocate A_buf[int32 * 1024]
+      // attr [B_buf] storage_scope = "global"
+      allocate B_buf[int32 * 1024]
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          A_buf[((i1*16) + i3)] = A[((i1*16) + i3)]
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        B_buf[((i1*16) + i3)] = B[((i1*16) + i3)]
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          B_buf[((i1*16) + i3)] = B[((i1*16) + i3)]
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        A_buf[((i1*16) + i3)] = (A_buf[((i1*16) + i3)] + B_buf[((i1*16) + i3)])
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          A_buf[((i1*16) + i3)] = (A_buf[((i1*16) + i3)] + B_buf[((i1*16) + i3)])
+        }
       }
-    }
-    for (i1, 0, 64) {
-      for (i3, 0, 16) {
-        C[((i1*16) + i3)] = int8(A_buf[((i1*16) + i3)])
+      for (i1, 0, 64) {
+        for (i3, 0, 16) {
+          C[((i1*16) + i3)] = int8(A_buf[((i1*16) + i3)])
+        }
       }
     }
 
 
 
 
+
 Although this schedule makes sense, it won't compile to VTA.
 In order to obtain correct code generation, we need to apply scheduling
 primitives and code annotation that will transform the schedule into
@@ -412,19 +415,22 @@ with an :code:`env.alu` pragma.
 
  .. code-block:: none
 
-    // attr [A_buf] storage_scope = "local.acc_buffer"
-    // attr [iter_var(vta, , vta)] coproc_scope = 2
-    VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), A, 0, 64, 1, 64, 0, 0, 0, 0, 0, 3)
-    VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), B, 0, 64, 1, 64, 0, 0, 0, 0, 64, 3)
-    // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
-    VTAUopLoopBegin(64, 1, 1, 0)
-    VTAUopPush(1, 0, 0, 64, 0, 2, 0, 0)
-    VTAUopLoopEnd()
-    vta.coproc_dep_push(2, 3)
-    // attr [iter_var(vta, , vta)] coproc_scope = 3
-    vta.coproc_dep_pop(2, 3)
-    VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), 0, 4, C, 0, 64, 1, 64)
-    vta.coproc_sync()
+    PrimFunc([A, B, C]) attrs={"tir.noalias": (bool)1, "global_symbol": "main"} {
+      // attr [A_buf] storage_scope = "local.acc_buffer"
+      // attr [iter_var(vta, , vta)] coproc_scope = 2
+      VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), A, 0, 64, 1, 64, 0, 0, 0, 0, 0, 3)
+      VTALoadBuffer2D(tvm_thread_context(VTATLSCommandHandle()), B, 0, 64, 1, 64, 0, 0, 0, 0, 64, 3)
+      // attr [iter_var(vta, , vta)] coproc_uop_scope = "VTAPushALUOp"
+      VTAUopLoopBegin(64, 1, 1, 0)
+      VTAUopPush(1, 0, 0, 64, 0, 2, 0, 0)
+      VTAUopLoopEnd()
+      vta.coproc_dep_push(2, 3)
+      // attr [iter_var(vta, , vta)] coproc_scope = 3
+      vta.coproc_dep_pop(2, 3)
+      VTAStoreBuffer2D(tvm_thread_context(VTATLSCommandHandle()), 0, 4, C, 0, 64, 1, 64)
+      vta.coproc_sync()
+    }
+
 
 
 
diff --git a/docs/api/python/driver.html b/docs/api/python/driver.html
index 0dce6c4..c14211e 100644
--- a/docs/api/python/driver.html
+++ b/docs/api/python/driver.html
@@ -213,7 +213,7 @@
 <p>Namespace for driver APIs</p>
 <dl class="function">
 <dt id="tvm.lower">
-<code class="sig-prename descclassname">tvm.</code><code class="sig-name descname">lower</code><span class="sig-paren">(</span><em class="sig-param">sch</em>, <em class="sig-param">args</em>, <em class="sig-param">name='default_function'</em>, <em class="sig-param">binds=None</em>, <em class="sig-param">simple_mode=False</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.lower" title="Permalink to this definition">¶</a></dt>
+<code class="sig-prename descclassname">tvm.</code><code class="sig-name descname">lower</code><span class="sig-paren">(</span><em class="sig-param">sch</em>, <em class="sig-param">args</em>, <em class="sig-param">name='main'</em>, <em class="sig-param">binds=None</em>, <em class="sig-param">simple_mode=False</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.lower" title="Permalink to this definition">¶</a></dt>
 <dd><p>Lowering step before build into target.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
diff --git a/docs/api/python/ir.html b/docs/api/python/ir.html
index 2a25fae..40a719c 100644
--- a/docs/api/python/ir.html
+++ b/docs/api/python/ir.html
@@ -1579,10 +1579,6 @@ needed.</p>
 <em class="property">class </em><code class="sig-prename descclassname">tvm.transform.</code><code class="sig-name descname">Sequential</code><span class="sig-paren">(</span><em class="sig-param">passes=None</em>, <em class="sig-param">opt_level=2</em>, <em class="sig-param">name='sequential'</em>, <em class="sig-param">required=None</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.transform.Sequential" title="Permalink to this definition">¶</a></dt>
 <dd><p>A pass that works on a sequence of pass objects. Multiple passes can be
 executed sequentially using this class.</p>
-<p>Some typical usage of the sequential pass are:
-1. Users provide a list of passes for optimization.
-2. Only an optimization level is provided so that the backend system has
-to glob all passes at this level and below to perform the optimizations.</p>
 <p>Note that users can also provide a series of passes that they don’t want to
 apply when running a sequential pass. Pass dependency will be resolved in
 the backend as well.</p>
@@ -1590,7 +1586,10 @@ the backend as well.</p>
 <dt class="field-odd">Parameters</dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>passes</strong> (<em>Optional</em><em>[</em><em>List</em><em>[</em><a class="reference internal" href="#tvm.transform.Pass" title="tvm.transform.Pass"><em>Pass</em></a><em>]</em><em>]</em>) – A sequence of passes candidate for optimization.</p></li>
-<li><p><strong>opt_level</strong> (<em>Optional</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a><em>]</em>) – The optimization level of this sequential pass.</p></li>
+<li><p><strong>opt_level</strong> (<em>Optional</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a><em>]</em>) – The optimization level of this sequential pass.
+The opt_level of a default sequential pass is set to 0.
+Note that some of the passes within the Sequantial may still not be executed
+if their opt_level is higher than the provided opt_level.</p></li>
 <li><p><strong>name</strong> (<em>Optional</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>]</em>) – The name of the sequential pass.</p></li>
 <li><p><strong>required</strong> (<em>Optional</em><em>[</em><em>List</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>]</em><em>]</em>) – The list of passes that the sequential pass is dependent on.</p></li>
 </ul>
diff --git a/docs/api/python/relay/analysis.html b/docs/api/python/relay/analysis.html
index 81acc3d..596ea4f 100644
--- a/docs/api/python/relay/analysis.html
+++ b/docs/api/python/relay/analysis.html
@@ -278,10 +278,13 @@
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.analysis.post_order_visit" title="tvm.relay.analysis.post_order_visit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">post_order_visit</span></code></a>(expr, fvisit)</p></td>
 <td><p>Recursively visit the ir in post DFS order node, apply fvisit.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.analysis.unmatched_cases" title="tvm.relay.analysis.unmatched_cases"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unmatched_cases</span></code></a>(match[, mod])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.analysis.search_fc_transpose" title="tvm.relay.analysis.search_fc_transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">search_fc_transpose</span></code></a>(expr)</p></td>
+<td><p>Search fc weight name in the patten: y = nn.dense(x, transpose(w, [1, 0]))</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.analysis.unmatched_cases" title="tvm.relay.analysis.unmatched_cases"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unmatched_cases</span></code></a>(match[, mod])</p></td>
 <td><p>Finds cases that the match expression does not catch, if any.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.analysis.well_formed" title="tvm.relay.analysis.well_formed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">well_formed</span></code></a>(expr)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.analysis.well_formed" title="tvm.relay.analysis.well_formed"><code class="xref py py-obj docutils literal notranslate"><span class="pre">well_formed</span></code></a>(expr)</p></td>
 <td><p>Check that each Var is only bound once (well formed).</p></td>
 </tr>
 </tbody>
@@ -751,6 +754,24 @@ only once.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.relay.analysis.search_fc_transpose">
+<code class="sig-prename descclassname">tvm.relay.analysis.</code><code class="sig-name descname">search_fc_transpose</code><span class="sig-paren">(</span><em class="sig-param">expr</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.analysis.search_fc_transpose" title="Permalink to this definition">¶</a></dt>
+<dd><p>Search fc weight name in the patten: y = nn.dense(x, transpose(w, [1, 0]))</p>
+<p>This function is used in the data_dep_optimization.simplify_fc_transpose method</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>expr</strong> (<em>tvm.relay.Expr</em>) – </p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>ret</strong> – Array of weight variable name in pattern y = nn.dense(x, transpose(w, [1, 0]))</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="../ir.html#tvm.ir.Array" title="tvm.ir.Array">Array</a>[<a class="reference internal" href="../runtime.html#tvm.runtime.String" title="tvm.runtime.String">String</a>]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.relay.analysis.unmatched_cases">
 <code class="sig-prename descclassname">tvm.relay.analysis.</code><code class="sig-name descname">unmatched_cases</code><span class="sig-paren">(</span><em class="sig-param">match</em>, <em class="sig-param">mod=None</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.analysis.unmatched_cases" title="Permalink to this definition">¶</a></dt>
 <dd><p>Finds cases that the match expression does not catch, if any.</p>
diff --git a/docs/api/python/relay/frontend.html b/docs/api/python/relay/frontend.html
index 077caa1..7a06382 100644
--- a/docs/api/python/relay/frontend.html
+++ b/docs/api/python/relay/frontend.html
@@ -220,45 +220,33 @@ for Relay.</p>
 <col style="width: 90%" />
 </colgroup>
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.dequantize_mxnet_min_max" title="tvm.relay.frontend.dequantize_mxnet_min_max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dequantize_mxnet_min_max</span></code></a>(data, min_range, …)</p></td>
-<td><p>Dequantizes the given <cite>data</cite> in {int8 or uint8} and the given min and max ranges.</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_caffe2" title="tvm.relay.frontend.from_caffe2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_caffe2</span></code></a>(init_net, predict_net[, shape, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_caffe2" title="tvm.relay.frontend.from_caffe2"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_caffe2</span></code></a>(init_net, predict_net[, shape, …])</p></td>
 <td><p>Load caffe2 graph which contains init_net and predict_net into Relay Function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_coreml" title="tvm.relay.frontend.from_coreml"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_coreml</span></code></a>(model[, shape])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_coreml" title="tvm.relay.frontend.from_coreml"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_coreml</span></code></a>(model[, shape])</p></td>
 <td><p>Convert from coreml model into Relay Function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_darknet" title="tvm.relay.frontend.from_darknet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_darknet</span></code></a>(net[, shape, dtype])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_darknet" title="tvm.relay.frontend.from_darknet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_darknet</span></code></a>(net[, shape, dtype])</p></td>
 <td><p>Convert from Darknet’s model into compatible relay Function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_keras" title="tvm.relay.frontend.from_keras"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_keras</span></code></a>(model[, shape, layout])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_keras" title="tvm.relay.frontend.from_keras"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_keras</span></code></a>(model[, shape, layout])</p></td>
 <td><p>Convert keras model to relay Function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_mxnet" title="tvm.relay.frontend.from_mxnet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_mxnet</span></code></a>(symbol[, shape, dtype, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_mxnet" title="tvm.relay.frontend.from_mxnet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_mxnet</span></code></a>(symbol[, shape, dtype, …])</p></td>
 <td><p>Convert from MXNet”s model into compatible relay Function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_onnx" title="tvm.relay.frontend.from_onnx"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_onnx</span></code></a>(model[, shape, dtype, opset])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_onnx" title="tvm.relay.frontend.from_onnx"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_onnx</span></code></a>(model[, shape, dtype, opset])</p></td>
 <td><p>Convert a ONNX model into an equivalent Relay Function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_pytorch" title="tvm.relay.frontend.from_pytorch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_pytorch</span></code></a>(script_module, input_shapes[, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_pytorch" title="tvm.relay.frontend.from_pytorch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_pytorch</span></code></a>(script_module, input_shapes[, …])</p></td>
 <td><p>Load PyTorch model in the form of a scripted PyTorch model and convert into relay.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_tensorflow" title="tvm.relay.frontend.from_tensorflow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_tensorflow</span></code></a>(graph[, layout, shape, outputs])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_tensorflow" title="tvm.relay.frontend.from_tensorflow"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_tensorflow</span></code></a>(graph[, layout, shape, outputs])</p></td>
 <td><p>Load tensorflow graph which is a python tensorflow graph object into relay.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_tflite" title="tvm.relay.frontend.from_tflite"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_tflite</span></code></a>(model, shape_dict, dtype_dict)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.from_tflite" title="tvm.relay.frontend.from_tflite"><code class="xref py py-obj docutils literal notranslate"><span class="pre">from_tflite</span></code></a>(model, shape_dict, dtype_dict)</p></td>
 <td><p>Convert from tflite model into compatible relay Function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.get_mkldnn_int8_scale" title="tvm.relay.frontend.get_mkldnn_int8_scale"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_mkldnn_int8_scale</span></code></a>(range_min, range_max)</p></td>
-<td><p>Computes the quantization scale using MKLDNN specifications with the given range.</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.frontend.get_mkldnn_uint8_scale" title="tvm.relay.frontend.get_mkldnn_uint8_scale"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_mkldnn_uint8_scale</span></code></a>(range_min, range_max)</p></td>
-<td><p>Computes the quantization scale using MKLDNN specifications with the given range.</p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.frontend.quantize_mxnet_min_max" title="tvm.relay.frontend.quantize_mxnet_min_max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_mxnet_min_max</span></code></a>(data, min_range, …)</p></td>
-<td><p>Quantizes the given <cite>data</cite> in float32 and the given min and max ranges and the output data type.</p></td>
-</tr>
 </tbody>
 </table>
 <dl class="function">
@@ -286,110 +274,6 @@ for Relay.</p>
 </dd></dl>
 
 <dl class="function">
-<dt id="tvm.relay.frontend.dequantize_mxnet_min_max">
-<code class="sig-prename descclassname">tvm.relay.frontend.</code><code class="sig-name descname">dequantize_mxnet_min_max</code><span class="sig-paren">(</span><em class="sig-param">data</em>, <em class="sig-param">min_range</em>, <em class="sig-param">max_range</em>, <em class="sig-param">in_dtype='int8'</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.frontend.dequantize_mxnet_min_max" title="Permalink to this definition">¶</a></dt>
-<dd><p>Dequantizes the given <cite>data</cite> in {int8 or uint8} and the given
-min and max ranges. The output data type is float32.
-Only <cite>float32</cite> is supported as output data types.
-The input data type is expected to be {int8 or uint8}.
-Mxnet has two different flavors for dequantization 1) Default 2)MKLDNN.
-To get the second one Mxnet must be built with MKLDNN during compile time.
-Users can choose either of the implementation for TVM runtime.
-The main difference between the two implementation is that MKLDNN is centered
-around 0 and the default implementation for uint8 is not.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>data</strong> (<em>tvm.relay.Expr</em>) – The input tensor to be quantized. Can be of type float32.</p></li>
-<li><p><strong>min_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.8)"><em>float</em></a>) – The minimum to use data elements for the output.</p></li>
-<li><p><strong>max_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.8)"><em>float</em></a>) – The maximum to use for data elements for the output.</p></li>
-<li><p><strong>in_dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>, </em><em>optional</em>) – The input data type, can be ‘int8’ or ‘uint8’</p></li>
-</ul>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p><strong>result</strong> – The computed result.</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>tvm.relay.Expr</p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="function">
-<dt id="tvm.relay.frontend.quantize_mxnet_min_max">
-<code class="sig-prename descclassname">tvm.relay.frontend.</code><code class="sig-name descname">quantize_mxnet_min_max</code><span class="sig-paren">(</span><em class="sig-param">data</em>, <em class="sig-param">min_range</em>, <em class="sig-param">max_range</em>, <em class="sig-param">out_dtype='int8'</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.frontend.quantize_mxnet_min_max" title="Permalink to this definition">¶</a></dt>
-<dd><p>Quantizes the given <cite>data</cite> in float32 and the given
-min and max ranges and the output data type.
-Only <cite>int8</cite> and <cite>uint8</cite> is supported as output data types.
-The input data type is expected to be <cite>float32</cite>.
-Mxnet has two different flavors for quantization 1) Default 2)MKLDNN.
-To get the second one Mxnet must be built with MKLDNN during compile time.
-Users can choose either of the implementation for TVM runtime.
-The main difference between the two implementation is that MKLDNN is centered
-around 0 and the default implementation for uint8 is not.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>data</strong> (<em>tvm.relay.Expr</em>) – The input tensor to be quantized. Can be of type float32.</p></li>
-<li><p><strong>min_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.8)"><em>float</em></a>) – The minimum to use data elements.</p></li>
-<li><p><strong>max_range</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.8)"><em>float</em></a>) – The maximum to use for data elements.</p></li>
-<li><p><strong>out_dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a><em>, </em><em>optional</em>) – The output data type, can be ‘int8’ or ‘uint8’</p></li>
-</ul>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p><strong>result</strong> – The computed result.</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>tvm.relay.Expr</p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="function">
-<dt id="tvm.relay.frontend.get_mkldnn_int8_scale">
-<code class="sig-prename descclassname">tvm.relay.frontend.</code><code class="sig-name descname">get_mkldnn_int8_scale</code><span class="sig-paren">(</span><em class="sig-param">range_min</em>, <em class="sig-param">range_max</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.frontend.get_mkldnn_int8_scale" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the quantization scale using MKLDNN specifications
-with the given range. The output datatype of tensor to be quantized should be
-int8.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>range_min</strong> (<em>float32</em>) – A number representing the lower end of the tensor to be quantized.</p></li>
-<li><p><strong>range_max</strong> (<em>float32</em>) – A number representing the upper end of the tensor to be quantized.</p></li>
-</ul>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p><strong>scale</strong></p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>A float32 number which acts as the scale for quantization.</p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="function">
-<dt id="tvm.relay.frontend.get_mkldnn_uint8_scale">
-<code class="sig-prename descclassname">tvm.relay.frontend.</code><code class="sig-name descname">get_mkldnn_uint8_scale</code><span class="sig-paren">(</span><em class="sig-param">range_min</em>, <em class="sig-param">range_max</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.frontend.get_mkldnn_uint8_scale" title="Permalink to this definition">¶</a></dt>
-<dd><p>Computes the quantization scale using MKLDNN specifications
-with the given range. The output datatype of tensor to be quantized should be
-uint8.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>range_min</strong> (<em>float32</em>) – A number representing the lower end of the tensor to be quantized.</p></li>
-<li><p><strong>range_max</strong> (<em>float32</em>) – A number representing the upper end of the tensor to be quantized.</p></li>
-</ul>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p><strong>scale</strong></p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>A float32 number which acts as the scale for quantization.</p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="function">
 <dt id="tvm.relay.frontend.from_keras">
 <code class="sig-prename descclassname">tvm.relay.frontend.</code><code class="sig-name descname">from_keras</code><span class="sig-paren">(</span><em class="sig-param">model</em>, <em class="sig-param">shape=None</em>, <em class="sig-param">layout='NCHW'</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.frontend.from_keras" title="Permalink to this definition">¶</a></dt>
 <dd><p>Convert keras model to relay Function.</p>
diff --git a/docs/api/python/relay/index.html b/docs/api/python/relay/index.html
index 7bb30b1..e8d75a3 100644
--- a/docs/api/python/relay/index.html
+++ b/docs/api/python/relay/index.html
@@ -482,178 +482,181 @@
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.logical_or" title="tvm.relay.logical_or"><code class="xref py py-obj docutils literal notranslate"><span class="pre">logical_or</span></code></a>(lhs, rhs)</p></td>
 <td><p>logical OR with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.max" title="tvm.relay.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.logical_xor" title="tvm.relay.logical_xor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">logical_xor</span></code></a>(lhs, rhs)</p></td>
+<td><p>logical XOR with numpy-style broadcasting.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.max" title="tvm.relay.max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the max of array elements over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.maximum" title="tvm.relay.maximum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">maximum</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.maximum" title="tvm.relay.maximum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">maximum</span></code></a>(lhs, rhs)</p></td>
 <td><p>Maximum with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.mean" title="tvm.relay.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.mean" title="tvm.relay.mean"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the mean of array elements over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.mean_std" title="tvm.relay.mean_std"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean_std</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.mean_std" title="tvm.relay.mean_std"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean_std</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the mean and standard deviation of data over given axes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.mean_variance" title="tvm.relay.mean_variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean_variance</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.mean_variance" title="tvm.relay.mean_variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mean_variance</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the mean and variance of data over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.min" title="tvm.relay.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">min</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.min" title="tvm.relay.min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">min</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the min of array elements over given axes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.minimum" title="tvm.relay.minimum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">minimum</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.minimum" title="tvm.relay.minimum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">minimum</span></code></a>(lhs, rhs)</p></td>
 <td><p>Minimum with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.mod" title="tvm.relay.mod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mod</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.mod" title="tvm.relay.mod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mod</span></code></a>(lhs, rhs)</p></td>
 <td><p>Mod with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.multiply" title="tvm.relay.multiply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">multiply</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.multiply" title="tvm.relay.multiply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">multiply</span></code></a>(lhs, rhs)</p></td>
 <td><p>Multiplication with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.ndarray_size" title="tvm.relay.ndarray_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray_size</span></code></a>(data[, dtype])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.ndarray_size" title="tvm.relay.ndarray_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray_size</span></code></a>(data[, dtype])</p></td>
 <td><p>Get number of elements of input tensor.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.negative" title="tvm.relay.negative"><code class="xref py py-obj docutils literal notranslate"><span class="pre">negative</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.negative" title="tvm.relay.negative"><code class="xref py py-obj docutils literal notranslate"><span class="pre">negative</span></code></a>(data)</p></td>
 <td><p>Compute element-wise negative of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.not_equal" title="tvm.relay.not_equal"><code class="xref py py-obj docutils literal notranslate"><span class="pre">not_equal</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.not_equal" title="tvm.relay.not_equal"><code class="xref py py-obj docutils literal notranslate"><span class="pre">not_equal</span></code></a>(lhs, rhs)</p></td>
 <td><p>Broadcasted elementwise test for (lhs != rhs).</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.one_hot" title="tvm.relay.one_hot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">one_hot</span></code></a>(indices, on_value, off_value, depth, …)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.one_hot" title="tvm.relay.one_hot"><code class="xref py py-obj docutils literal notranslate"><span class="pre">one_hot</span></code></a>(indices, on_value, off_value, depth, …)</p></td>
 <td><p>Returns a one-hot tensor where the locations repsented by indices take value on_value, other locations take value off_value.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.ones" title="tvm.relay.ones"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ones</span></code></a>(shape, dtype)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.ones" title="tvm.relay.ones"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ones</span></code></a>(shape, dtype)</p></td>
 <td><p>Fill array with ones.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.ones_like" title="tvm.relay.ones_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ones_like</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.ones_like" title="tvm.relay.ones_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ones_like</span></code></a>(data)</p></td>
 <td><p>Returns an array of ones, with same type and shape as the input.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.optimize" title="tvm.relay.optimize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">optimize</span></code></a>(mod[, target, params])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.optimize" title="tvm.relay.optimize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">optimize</span></code></a>(mod[, target, params])</p></td>
 <td><p>Helper function that optimizes a Relay module.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.power" title="tvm.relay.power"><code class="xref py py-obj docutils literal notranslate"><span class="pre">power</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.power" title="tvm.relay.power"><code class="xref py py-obj docutils literal notranslate"><span class="pre">power</span></code></a>(lhs, rhs)</p></td>
 <td><p>Power with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.prod" title="tvm.relay.prod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prod</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.prod" title="tvm.relay.prod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prod</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the products of array elements over given axes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.reinterpret" title="tvm.relay.reinterpret"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reinterpret</span></code></a>(data, dtype)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.reinterpret" title="tvm.relay.reinterpret"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reinterpret</span></code></a>(data, dtype)</p></td>
 <td><p>Reinterpret input tensor to data type.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.repeat" title="tvm.relay.repeat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">repeat</span></code></a>(data, repeats, axis)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.repeat" title="tvm.relay.repeat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">repeat</span></code></a>(data, repeats, axis)</p></td>
 <td><p>Repeats elements of an array.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.reshape" title="tvm.relay.reshape"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reshape</span></code></a>(data, newshape)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.reshape" title="tvm.relay.reshape"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reshape</span></code></a>(data, newshape)</p></td>
 <td><p>Reshape the input array.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.reshape_like" title="tvm.relay.reshape_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reshape_like</span></code></a>(data, shape_like)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.reshape_like" title="tvm.relay.reshape_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reshape_like</span></code></a>(data, shape_like)</p></td>
 <td><p>Reshapes the input array by the size of another array.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.reverse" title="tvm.relay.reverse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reverse</span></code></a>(data, axis)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.reverse" title="tvm.relay.reverse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reverse</span></code></a>(data, axis)</p></td>
 <td><p>Reverses the order of elements along given axis while preserving array shape.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.reverse_reshape" title="tvm.relay.reverse_reshape"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reverse_reshape</span></code></a>(data, newshape)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.reverse_reshape" title="tvm.relay.reverse_reshape"><code class="xref py py-obj docutils literal notranslate"><span class="pre">reverse_reshape</span></code></a>(data, newshape)</p></td>
 <td><p>Reshapes the input array where the special values are inferred from right to left.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.right_shift" title="tvm.relay.right_shift"><code class="xref py py-obj docutils literal notranslate"><span class="pre">right_shift</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.right_shift" title="tvm.relay.right_shift"><code class="xref py py-obj docutils literal notranslate"><span class="pre">right_shift</span></code></a>(lhs, rhs)</p></td>
 <td><p>Right shift with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.round" title="tvm.relay.round"><code class="xref py py-obj docutils literal notranslate"><span class="pre">round</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.round" title="tvm.relay.round"><code class="xref py py-obj docutils literal notranslate"><span class="pre">round</span></code></a>(data)</p></td>
 <td><p>Compute element-wise round of data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.rsqrt" title="tvm.relay.rsqrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rsqrt</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.rsqrt" title="tvm.relay.rsqrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rsqrt</span></code></a>(data)</p></td>
 <td><p>Compute elementwise rsqrt of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.save_param_dict" title="tvm.relay.save_param_dict"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_param_dict</span></code></a>(params)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.save_param_dict" title="tvm.relay.save_param_dict"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_param_dict</span></code></a>(params)</p></td>
 <td><p>Save parameter dictionary to binary bytes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.scalar_type" title="tvm.relay.scalar_type"><code class="xref py py-obj docutils literal notranslate"><span class="pre">scalar_type</span></code></a>(dtype)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.scalar_type" title="tvm.relay.scalar_type"><code class="xref py py-obj docutils literal notranslate"><span class="pre">scalar_type</span></code></a>(dtype)</p></td>
 <td><p>Creates a scalar type.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sequence_mask" title="tvm.relay.sequence_mask"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sequence_mask</span></code></a>(data, valid_length[, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sequence_mask" title="tvm.relay.sequence_mask"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sequence_mask</span></code></a>(data, valid_length[, …])</p></td>
 <td><p>Sets all elements outside the expected length of the sequence to a constant value.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.setrecursionlimit" title="tvm.relay.setrecursionlimit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setrecursionlimit</span></code></a>(n)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.setrecursionlimit" title="tvm.relay.setrecursionlimit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setrecursionlimit</span></code></a>(n)</p></td>
 <td><p>Set the maximum depth of the Python interpreter stack to n.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.shape_of" title="tvm.relay.shape_of"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shape_of</span></code></a>(data[, dtype])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.shape_of" title="tvm.relay.shape_of"><code class="xref py py-obj docutils literal notranslate"><span class="pre">shape_of</span></code></a>(data[, dtype])</p></td>
 <td><p>Get shape of a tensor.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sigmoid" title="tvm.relay.sigmoid"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sigmoid</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sigmoid" title="tvm.relay.sigmoid"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sigmoid</span></code></a>(data)</p></td>
 <td><p>Compute elementwise sigmoid of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sign" title="tvm.relay.sign"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sign</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sign" title="tvm.relay.sign"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sign</span></code></a>(data)</p></td>
 <td><p>Compute element-wise absolute of data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sin" title="tvm.relay.sin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sin</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sin" title="tvm.relay.sin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sin</span></code></a>(data)</p></td>
 <td><p>Compute elementwise sin of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.slice_like" title="tvm.relay.slice_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">slice_like</span></code></a>(data, shape_like[, axes])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.slice_like" title="tvm.relay.slice_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">slice_like</span></code></a>(data, shape_like[, axes])</p></td>
 <td><p>Slice the first input with respect to the second input.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.split" title="tvm.relay.split"><code class="xref py py-obj docutils literal notranslate"><span class="pre">split</span></code></a>(data, indices_or_sections[, axis])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.split" title="tvm.relay.split"><code class="xref py py-obj docutils literal notranslate"><span class="pre">split</span></code></a>(data, indices_or_sections[, axis])</p></td>
 <td><p>Split input tensor along axis by sections or indices.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sqrt" title="tvm.relay.sqrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sqrt</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sqrt" title="tvm.relay.sqrt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sqrt</span></code></a>(data)</p></td>
 <td><p>Compute elementwise sqrt of data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.squeeze" title="tvm.relay.squeeze"><code class="xref py py-obj docutils literal notranslate"><span class="pre">squeeze</span></code></a>(data[, axis])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.squeeze" title="tvm.relay.squeeze"><code class="xref py py-obj docutils literal notranslate"><span class="pre">squeeze</span></code></a>(data[, axis])</p></td>
 <td><p>Squeeze axes in the array.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.stack" title="tvm.relay.stack"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stack</span></code></a>(data, axis)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.stack" title="tvm.relay.stack"><code class="xref py py-obj docutils literal notranslate"><span class="pre">stack</span></code></a>(data, axis)</p></td>
 <td><p>Join a sequence of arrays along a new axis.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.std" title="tvm.relay.std"><code class="xref py py-obj docutils literal notranslate"><span class="pre">std</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.std" title="tvm.relay.std"><code class="xref py py-obj docutils literal notranslate"><span class="pre">std</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the standard deviation of data over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.strided_set" title="tvm.relay.strided_set"><code class="xref py py-obj docutils literal notranslate"><span class="pre">strided_set</span></code></a>(data, v, begin, end[, strides])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.strided_set" title="tvm.relay.strided_set"><code class="xref py py-obj docutils literal notranslate"><span class="pre">strided_set</span></code></a>(data, v, begin, end[, strides])</p></td>
 <td><p>Strided set of an array.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.strided_slice" title="tvm.relay.strided_slice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">strided_slice</span></code></a>(data, begin, end[, strides])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.strided_slice" title="tvm.relay.strided_slice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">strided_slice</span></code></a>(data, begin, end[, strides])</p></td>
 <td><p>Strided slice of an array.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.subtract" title="tvm.relay.subtract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">subtract</span></code></a>(lhs, rhs)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.subtract" title="tvm.relay.subtract"><code class="xref py py-obj docutils literal notranslate"><span class="pre">subtract</span></code></a>(lhs, rhs)</p></td>
 <td><p>Subtraction with numpy-style broadcasting.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.sum" title="tvm.relay.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sum</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.sum" title="tvm.relay.sum"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sum</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the sum of array elements over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.take" title="tvm.relay.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">take</span></code></a>(data, indices[, axis, mode])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.take" title="tvm.relay.take"><code class="xref py py-obj docutils literal notranslate"><span class="pre">take</span></code></a>(data, indices[, axis, mode])</p></td>
 <td><p>Take elements from an array along an axis.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.tan" title="tvm.relay.tan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tan</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.tan" title="tvm.relay.tan"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tan</span></code></a>(data)</p></td>
 <td><p>Compute elementwise tan of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.tanh" title="tvm.relay.tanh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tanh</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.tanh" title="tvm.relay.tanh"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tanh</span></code></a>(data)</p></td>
 <td><p>Compute element-wise tanh of data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.tile" title="tvm.relay.tile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tile</span></code></a>(data, reps)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.tile" title="tvm.relay.tile"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tile</span></code></a>(data, reps)</p></td>
 <td><p>Repeats the whole array multiple times.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.topk" title="tvm.relay.topk"><code class="xref py py-obj docutils literal notranslate"><span class="pre">topk</span></code></a>(data[, k, axis, ret_type, is_ascend, dtype])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.topk" title="tvm.relay.topk"><code class="xref py py-obj docutils literal notranslate"><span class="pre">topk</span></code></a>(data[, k, axis, ret_type, is_ascend, dtype])</p></td>
 <td><p>Get the top k elements in an input tensor along the given axis.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transpose" title="tvm.relay.transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transpose</span></code></a>(data[, axes])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transpose" title="tvm.relay.transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">transpose</span></code></a>(data[, axes])</p></td>
 <td><p>Permutes the dimensions of an array.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.trunc" title="tvm.relay.trunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">trunc</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.trunc" title="tvm.relay.trunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">trunc</span></code></a>(data)</p></td>
 <td><p>Compute element-wise trunc of data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.unravel_index" title="tvm.relay.unravel_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unravel_index</span></code></a>(indices, shape)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.unravel_index" title="tvm.relay.unravel_index"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unravel_index</span></code></a>(indices, shape)</p></td>
 <td><p>Convert a flat index or array of flat indices into a tuple of coordinate arrays.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.var" title="tvm.relay.var"><code class="xref py py-obj docutils literal notranslate"><span class="pre">var</span></code></a>(name_hint[, type_annotation, shape, dtype])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.var" title="tvm.relay.var"><code class="xref py py-obj docutils literal notranslate"><span class="pre">var</span></code></a>(name_hint[, type_annotation, shape, dtype])</p></td>
 <td><p>Create a new tvm.relay.Var.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.variance" title="tvm.relay.variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">variance</span></code></a>(data[, axis, keepdims, exclude])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.variance" title="tvm.relay.variance"><code class="xref py py-obj docutils literal notranslate"><span class="pre">variance</span></code></a>(data[, axis, keepdims, exclude])</p></td>
 <td><p>Computes the variance of data over given axes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.where" title="tvm.relay.where"><code class="xref py py-obj docutils literal notranslate"><span class="pre">where</span></code></a>(condition, x, y)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.where" title="tvm.relay.where"><code class="xref py py-obj docutils literal notranslate"><span class="pre">where</span></code></a>(condition, x, y)</p></td>
 <td><p>Selecting elements from either x or y depending on the value of the condition.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.zeros" title="tvm.relay.zeros"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros</span></code></a>(shape, dtype)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.zeros" title="tvm.relay.zeros"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros</span></code></a>(shape, dtype)</p></td>
 <td><p>Fill array with zeros.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.zeros_like" title="tvm.relay.zeros_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros_like</span></code></a>(data)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.zeros_like" title="tvm.relay.zeros_like"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zeros_like</span></code></a>(data)</p></td>
 <td><p>Returns an array of zeros, with same type and shape as the input.</p></td>
 </tr>
 </tbody>
@@ -2522,6 +2525,26 @@ defined by indices.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.relay.logical_xor">
+<code class="sig-prename descclassname">tvm.relay.</code><code class="sig-name descname">logical_xor</code><span class="sig-paren">(</span><em class="sig-param">lhs</em>, <em class="sig-param">rhs</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.logical_xor" title="Permalink to this definition">¶</a></dt>
+<dd><p>logical XOR with numpy-style broadcasting.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>lhs</strong> (<em>relay.Expr</em>) – The left hand side input data</p></li>
+<li><p><strong>rhs</strong> (<em>relay.Expr</em>) – The right hand side input data</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>result</strong> – The computed result.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p>relay.Expr</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.relay.max">
 <code class="sig-prename descclassname">tvm.relay.</code><code class="sig-name descname">max</code><span class="sig-paren">(</span><em class="sig-param">data</em>, <em class="sig-param">axis=None</em>, <em class="sig-param">keepdims=False</em>, <em class="sig-param">exclude=False</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.max" title="Permalink to this definition">¶</a></dt>
 <dd><p>Computes the max of array elements over given axes.</p>
diff --git a/docs/api/python/relay/nn.html b/docs/api/python/relay/nn.html
index 84c0cfb..a1f574f 100644
--- a/docs/api/python/relay/nn.html
+++ b/docs/api/python/relay/nn.html
@@ -335,64 +335,67 @@
 <tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.global_max_pool2d" title="tvm.relay.nn.global_max_pool2d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">global_max_pool2d</span></code></a>(data[, layout])</p></td>
 <td><p>2D global maximum pooling operator.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.instance_norm" title="tvm.relay.nn.instance_norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">instance_norm</span></code></a>(data, gamma, beta[, axis, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.group_norm" title="tvm.relay.nn.group_norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">group_norm</span></code></a>(data, gamma, beta, num_groups[, …])</p></td>
+<td><p>Group normalization normalizes over group of channels for each training examples.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.instance_norm" title="tvm.relay.nn.instance_norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">instance_norm</span></code></a>(data, gamma, beta[, axis, …])</p></td>
 <td><p>Instance Normalization (Ulyanov and et al., 2016) Applies instance normalization to the n-dimensional input array.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.l2_normalize" title="tvm.relay.nn.l2_normalize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">l2_normalize</span></code></a>(data, eps[, axis])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.l2_normalize" title="tvm.relay.nn.l2_normalize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">l2_normalize</span></code></a>(data, eps[, axis])</p></td>
 <td><p>Perform L2 normalization on the input data</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.layer_norm" title="tvm.relay.nn.layer_norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">layer_norm</span></code></a>(data, gamma, beta[, axis, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.layer_norm" title="tvm.relay.nn.layer_norm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">layer_norm</span></code></a>(data, gamma, beta[, axis, …])</p></td>
 <td><p>Layer normalization (Lei Ba and et al., 2016).</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.leaky_relu" title="tvm.relay.nn.leaky_relu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">leaky_relu</span></code></a>(data, alpha)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.leaky_relu" title="tvm.relay.nn.leaky_relu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">leaky_relu</span></code></a>(data, alpha)</p></td>
 <td><p>This operator takes data as input and does Leaky version of a Rectified Linear Unit.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.log_softmax" title="tvm.relay.nn.log_softmax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log_softmax</span></code></a>(data[, axis])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.log_softmax" title="tvm.relay.nn.log_softmax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">log_softmax</span></code></a>(data[, axis])</p></td>
 <td><p>Computes log softmax.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.lrn" title="tvm.relay.nn.lrn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lrn</span></code></a>(data[, size, axis, bias, alpha, beta])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.lrn" title="tvm.relay.nn.lrn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lrn</span></code></a>(data[, size, axis, bias, alpha, beta])</p></td>
 <td><p>This operator takes data as input and does local response normalization.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool1d" title="tvm.relay.nn.max_pool1d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool1d</span></code></a>(data[, pool_size, strides, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool1d" title="tvm.relay.nn.max_pool1d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool1d</span></code></a>(data[, pool_size, strides, …])</p></td>
 <td><p>1D maximum pooling operator.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool2d" title="tvm.relay.nn.max_pool2d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool2d</span></code></a>(data[, pool_size, strides, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool2d" title="tvm.relay.nn.max_pool2d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool2d</span></code></a>(data[, pool_size, strides, …])</p></td>
 <td><p>2D maximum pooling operator.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool2d_grad" title="tvm.relay.nn.max_pool2d_grad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool2d_grad</span></code></a>(out_grad, data[, pool_size, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool2d_grad" title="tvm.relay.nn.max_pool2d_grad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool2d_grad</span></code></a>(out_grad, data[, pool_size, …])</p></td>
 <td><p>Gradient of 2D maximum pooling operator.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool3d" title="tvm.relay.nn.max_pool3d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool3d</span></code></a>(data[, pool_size, strides, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.max_pool3d" title="tvm.relay.nn.max_pool3d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">max_pool3d</span></code></a>(data[, pool_size, strides, …])</p></td>
 <td><p>3D maximum pooling operator.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.mirror_pad" title="tvm.relay.nn.mirror_pad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mirror_pad</span></code></a>(data, pad_width[, mode])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.mirror_pad" title="tvm.relay.nn.mirror_pad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">mirror_pad</span></code></a>(data, pad_width[, mode])</p></td>
 <td><p>MirrorPadding</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.pad" title="tvm.relay.nn.pad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pad</span></code></a>(data, pad_width[, pad_value, pad_mode])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.pad" title="tvm.relay.nn.pad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pad</span></code></a>(data, pad_width[, pad_value, pad_mode])</p></td>
 <td><p>Padding</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.prelu" title="tvm.relay.nn.prelu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prelu</span></code></a>(data, alpha[, axis])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.prelu" title="tvm.relay.nn.prelu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prelu</span></code></a>(data, alpha[, axis])</p></td>
 <td><p>This operator takes data as input and does Leaky version of a Rectified Linear Unit.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.relu" title="tvm.relay.nn.relu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">relu</span></code></a>(data)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.relu" title="tvm.relay.nn.relu"><code class="xref py py-obj docutils literal notranslate"><span class="pre">relu</span></code></a>(data)</p></td>
 <td><p>Rectified linear unit.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.softmax" title="tvm.relay.nn.softmax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">softmax</span></code></a>(data[, axis])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.softmax" title="tvm.relay.nn.softmax"><code class="xref py py-obj docutils literal notranslate"><span class="pre">softmax</span></code></a>(data[, axis])</p></td>
 <td><p>Computes softmax.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.space_to_depth" title="tvm.relay.nn.space_to_depth"><code class="xref py py-obj docutils literal notranslate"><span class="pre">space_to_depth</span></code></a>(data, block_size[, layout])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.space_to_depth" title="tvm.relay.nn.space_to_depth"><code class="xref py py-obj docutils literal notranslate"><span class="pre">space_to_depth</span></code></a>(data, block_size[, layout])</p></td>
 <td><p>Convert spatial blocks into channels.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.sparse_dense" title="tvm.relay.nn.sparse_dense"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparse_dense</span></code></a>(data, weight)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.sparse_dense" title="tvm.relay.nn.sparse_dense"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparse_dense</span></code></a>(data, weight)</p></td>
 <td><p>Computes the matrix multiplication of <cite>data</cite> and <cite>weight</cite>, where <cite>data</cite> is a dense matrix and <cite>weight</cite> is a sparse (either BSR or CSR) namedtuple with fields <cite>data</cite>, <cite>indices</cite>, and <cite>indptr</cite>.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.sparse_transpose" title="tvm.relay.nn.sparse_transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparse_transpose</span></code></a>(x)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.sparse_transpose" title="tvm.relay.nn.sparse_transpose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sparse_transpose</span></code></a>(x)</p></td>
 <td><p>Computes the fast matrix transpose of x, where x is a sparse tensor in CSR format (represented as a namedtuple with fields <cite>data</cite>, <cite>indices</cite>, and <cite>indptr</cite>).</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.upsampling" title="tvm.relay.nn.upsampling"><code class="xref py py-obj docutils literal notranslate"><span class="pre">upsampling</span></code></a>(data[, scale_h, scale_w, layout, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.upsampling" title="tvm.relay.nn.upsampling"><code class="xref py py-obj docutils literal notranslate"><span class="pre">upsampling</span></code></a>(data[, scale_h, scale_w, layout, …])</p></td>
 <td><p>Upsampling.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.nn.upsampling3d" title="tvm.relay.nn.upsampling3d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">upsampling3d</span></code></a>(data[, scale_d, scale_h, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.nn.upsampling3d" title="tvm.relay.nn.upsampling3d"><code class="xref py py-obj docutils literal notranslate"><span class="pre">upsampling3d</span></code></a>(data[, scale_d, scale_h, …])</p></td>
 <td><p>3D Upsampling.</p></td>
 </tr>
 </tbody>
@@ -1604,6 +1607,50 @@ to produce an output Tensor with the following rule:</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.relay.nn.group_norm">
+<code class="sig-prename descclassname">tvm.relay.nn.</code><code class="sig-name descname">group_norm</code><span class="sig-paren">(</span><em class="sig-param">data</em>, <em class="sig-param">gamma</em>, <em class="sig-param">beta</em>, <em class="sig-param">num_groups</em>, <em class="sig-param">axis=1</em>, <em class="sig-param">epsilon=1e-05</em>, <em class="sig-param">center=True</em>, <em class="sig-param">scale=True</em><span class="sig-paren">)</span><a class="headerlink" href [...]
+<dd><p>Group normalization normalizes over group of channels for each training examples.
+We can say that, Group Norm is in between Instance Norm and Layer Norm. When we put
+all the channels into a single group, group normalization becomes Layer normalization.
+And, when we put each channel into different groups it becomes Instance normalization</p>
+<p><a class="reference external" href="https://arxiv.org/pdf/1803.08494.pdf">https://arxiv.org/pdf/1803.08494.pdf</a></p>
+<p>Applies group normalization to the n-dimensional input array by seperating the input channels
+into ‘num_groups’ groups, each containing ‘num_channels / num_groups’ channels.
+The mean and standard-deviation are calculated separately over the each group. gamma and
+beta are learnable per-channel affine transform parameter vectors of size num_channels.</p>
+<div class="math notranslate nohighlight">
+\[out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis)+\epsilon}}
+    * gamma + beta\]</div>
+<p>Unlike batch normalization, the mean and var are computed along a group of channels.</p>
+<p>If the input has size k on axis 1, then both gamma and beta have shape (k,).</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>This operator can be optimized away for inference.</p>
+</div>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>data</strong> (<em>tvm.relay.Expr</em>) – Input to which group_norm will be applied.</p></li>
+<li><p><strong>gamma</strong> (<em>tvm.relay.Expr</em>) – The gamma scale factor.</p></li>
+<li><p><strong>beta</strong> (<em>tvm.relay.Expr</em>) – The beta offset factor.</p></li>
+<li><p><strong>num_groups</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The number of groups to separate the channels into.</p></li>
+<li><p><strong>axis</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a><em>, </em><em>optional</em><em>, </em><em>default=1</em>) – The axis of the channels.</p></li>
+<li><p><strong>epsilon</strong> (<em>double</em><em>, </em><em>optional</em><em>, </em><em>default=1e-5</em>) – Small float added to variance to avoid dividing by zero.</p></li>
+<li><p><strong>center</strong> (<em>boolean</em><em>, </em><em>optional</em><em>, </em><em>default=True</em>) – If True, add offset of beta to normalized tensor, If False,
+beta is ignored.</p></li>
+<li><p><strong>scale</strong> (<em>boolean</em><em>, </em><em>optional</em><em>, </em><em>default=True</em>) – If True, multiply by gamma. If False, gamma is not used.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>result</strong> – The normalized data.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p>tvm.relay.Expr</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.relay.nn.instance_norm">
 <code class="sig-prename descclassname">tvm.relay.nn.</code><code class="sig-name descname">instance_norm</code><span class="sig-paren">(</span><em class="sig-param">data</em>, <em class="sig-param">gamma</em>, <em class="sig-param">beta</em>, <em class="sig-param">axis=1</em>, <em class="sig-param">epsilon=1e-05</em>, <em class="sig-param">center=True</em>, <em class="sig-param">scale=True</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.nn.instance_norm" title [...]
 <dd><p>Instance Normalization (Ulyanov and et al., 2016)
diff --git a/docs/api/python/relay/transform.html b/docs/api/python/relay/transform.html
index 5aea54d..dcb5525 100644
--- a/docs/api/python/relay/transform.html
+++ b/docs/api/python/relay/transform.html
@@ -245,60 +245,66 @@
 <tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.DeadCodeElimination" title="tvm.relay.transform.DeadCodeElimination"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeadCodeElimination</span></code></a>([inline_once])</p></td>
 <td><p>Remove expressions that do not have any users (dead code).</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.EliminateCommonSubexpr" title="tvm.relay.transform.EliminateCommonSubexpr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EliminateCommonSubexpr</span></code></a>([fskip])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.DenseToSparse" title="tvm.relay.transform.DenseToSparse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DenseToSparse</span></code></a>(weight_name, weight_shape)</p></td>
+<td><p>Rewrite qualified <code class="docutils literal notranslate"><span class="pre">`nn.dense</span> <span class="pre">operation`</span></code> to <code class="docutils literal notranslate"><span class="pre">`nn.sparse_dense`</span></code></p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.EliminateCommonSubexpr" title="tvm.relay.transform.EliminateCommonSubexpr"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EliminateCommonSubexpr</span></code></a>([fskip])</p></td>
 <td><p>Eliminate common subexpressions.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.EtaExpand" title="tvm.relay.transform.EtaExpand"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EtaExpand</span></code></a>([expand_constructor, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.EtaExpand" title="tvm.relay.transform.EtaExpand"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EtaExpand</span></code></a>([expand_constructor, …])</p></td>
 <td><p>Add abstraction over a constructor or global variable bound to a function</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.FastMath" title="tvm.relay.transform.FastMath"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FastMath</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.FastMath" title="tvm.relay.transform.FastMath"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FastMath</span></code></a>()</p></td>
 <td><p>Converts the expensive non linear functions to their fast but approximate counterparts.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.FoldConstant" title="tvm.relay.transform.FoldConstant"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FoldConstant</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.FoldConstant" title="tvm.relay.transform.FoldConstant"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FoldConstant</span></code></a>()</p></td>
 <td><p>Fold the constant expressions in a Relay program.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.FoldScaleAxis" title="tvm.relay.transform.FoldScaleAxis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FoldScaleAxis</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.FoldScaleAxis" title="tvm.relay.transform.FoldScaleAxis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FoldScaleAxis</span></code></a>()</p></td>
 <td><p>Fold the scaling of axis into weights of conv2d/dense.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.ForwardFoldScaleAxis" title="tvm.relay.transform.ForwardFoldScaleAxis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ForwardFoldScaleAxis</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.ForwardFoldScaleAxis" title="tvm.relay.transform.ForwardFoldScaleAxis"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ForwardFoldScaleAxis</span></code></a>()</p></td>
 <td><p>Fold the scaling of axis into weights of conv2d/dense.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.FuseOps" title="tvm.relay.transform.FuseOps"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FuseOps</span></code></a>([fuse_opt_level])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.FuseOps" title="tvm.relay.transform.FuseOps"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FuseOps</span></code></a>([fuse_opt_level])</p></td>
 <td><p>Fuse operators in an expr to a larger operator according to some rules.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.InferType" title="tvm.relay.transform.InferType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InferType</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.InferType" title="tvm.relay.transform.InferType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InferType</span></code></a>()</p></td>
 <td><p>Infer the type of an expr.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.Inline" title="tvm.relay.transform.Inline"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Inline</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.Inline" title="tvm.relay.transform.Inline"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Inline</span></code></a>()</p></td>
 <td><p>Perform inlining on the given Relay IR module.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.LambdaLift" title="tvm.relay.transform.LambdaLift"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LambdaLift</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.LambdaLift" title="tvm.relay.transform.LambdaLift"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LambdaLift</span></code></a>()</p></td>
 <td><p>Lift the closure to global function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.LazyGradientInit" title="tvm.relay.transform.LazyGradientInit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LazyGradientInit</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.LazyGradientInit" title="tvm.relay.transform.LazyGradientInit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LazyGradientInit</span></code></a>()</p></td>
 <td><p>Reduces memory usage of gradient tensors</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.Legalize" title="tvm.relay.transform.Legalize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Legalize</span></code></a>([legalize_map_attr_name])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.Legalize" title="tvm.relay.transform.Legalize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Legalize</span></code></a>([legalize_map_attr_name])</p></td>
 <td><p>Legalizes an expression with another expression.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.MergeCompilerRegions" title="tvm.relay.transform.MergeCompilerRegions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MergeCompilerRegions</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.MergeCompilerRegions" title="tvm.relay.transform.MergeCompilerRegions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MergeCompilerRegions</span></code></a>()</p></td>
 <td><p>Merge together compiler regions.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.MergeComposite" title="tvm.relay.transform.MergeComposite"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MergeComposite</span></code></a>(pattern_table)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.MergeComposite" title="tvm.relay.transform.MergeComposite"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MergeComposite</span></code></a>(pattern_table)</p></td>
 <td><p>Merge multiple operators into a single composite relay function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.PartialEvaluate" title="tvm.relay.transform.PartialEvaluate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PartialEvaluate</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.PartialEvaluate" title="tvm.relay.transform.PartialEvaluate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PartialEvaluate</span></code></a>()</p></td>
 <td><p>Evaluate the static fragment of the code.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.PartitionGraph" title="tvm.relay.transform.PartitionGraph"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PartitionGraph</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.PartitionGraph" title="tvm.relay.transform.PartitionGraph"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PartitionGraph</span></code></a>()</p></td>
 <td><p>Partition a Relay program into regions that can be executed on different backends.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.RemoveUnusedFunctions" title="tvm.relay.transform.RemoveUnusedFunctions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RemoveUnusedFunctions</span></code></a>([entry_functions])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.RemoveUnusedFunctions" title="tvm.relay.transform.RemoveUnusedFunctions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RemoveUnusedFunctions</span></code></a>([entry_functions])</p></td>
 <td><p>Remove unused global relay functions in a relay module.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.RewriteAnnotatedOps" title="tvm.relay.transform.RewriteAnnotatedOps"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RewriteAnnotatedOps</span></code></a>(fallback_device)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.RewriteAnnotatedOps" title="tvm.relay.transform.RewriteAnnotatedOps"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RewriteAnnotatedOps</span></code></a>(fallback_device)</p></td>
 <td><p>Rewrite the annotated program where annotation operators, e.g.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.relay.transform.SimplifyFCTranspose" title="tvm.relay.transform.SimplifyFCTranspose"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SimplifyFCTranspose</span></code></a>(target_weight_name)</p></td>
+<td><p>Rewrite <code class="docutils literal notranslate"><span class="pre">`y</span> <span class="pre">=</span> <span class="pre">nn.dense(x,</span> <span class="pre">transpose(w,</span> <span class="pre">[1,</span> <span class="pre">0]))`</span></code> to <code class="docutils literal notranslate"><span class="pre">`y</span> <span class="pre">=</span> <span class="pre">nn.dense(x,</span> <span class="pre">wt)`</span></code></p></td>
+</tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.relay.transform.SimplifyInference" title="tvm.relay.transform.SimplifyInference"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SimplifyInference</span></code></a>()</p></td>
 <td><p>Simplify the data-flow graph for inference phase.</p></td>
 </tr>
@@ -531,6 +537,28 @@ layout using the InferCorrectLayout infrastructure.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.relay.transform.DenseToSparse">
+<code class="sig-prename descclassname">tvm.relay.transform.</code><code class="sig-name descname">DenseToSparse</code><span class="sig-paren">(</span><em class="sig-param">weight_name</em>, <em class="sig-param">weight_shape</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.transform.DenseToSparse" title="Permalink to this definition">¶</a></dt>
+<dd><p>Rewrite qualified <code class="docutils literal notranslate"><span class="pre">`nn.dense</span> <span class="pre">operation`</span></code> to <code class="docutils literal notranslate"><span class="pre">`nn.sparse_dense`</span></code>
+This pass is used in <code class="docutils literal notranslate"><span class="pre">`data_dep_optimization.bsr_dense`</span></code>
+Parameters of this pass is generated by <code class="docutils literal notranslate"><span class="pre">`analysis.sparse_dense.process_params`</span></code></p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>weight_name</strong> (<a class="reference internal" href="../ir.html#tvm.ir.Array" title="tvm.ir.Array"><em>Array</em></a><em>[</em><a class="reference internal" href="../runtime.html#tvm.runtime.String" title="tvm.runtime.String"><em>String</em></a><em>]</em>) – Names of weights which qualified sparse contrains</p></li>
+<li><p><strong>weight_shape</strong> (<a class="reference internal" href="../ir.html#tvm.ir.Array" title="tvm.ir.Array"><em>Array</em></a><em>[</em><a class="reference internal" href="../ir.html#tvm.ir.Array" title="tvm.ir.Array"><em>Array</em></a><em>[</em><a class="reference internal" href="../tir.html#tvm.tir.IntImm" title="tvm.tir.IntImm"><em>IntImm</em></a><em>]</em><em>]</em>) – Weights shape in BSR format.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>ret</strong> – The registered DenseToSparse pass.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="../ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.relay.transform.EliminateCommonSubexpr">
 <code class="sig-prename descclassname">tvm.relay.transform.</code><code class="sig-name descname">EliminateCommonSubexpr</code><span class="sig-paren">(</span><em class="sig-param">fskip=None</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.transform.EliminateCommonSubexpr" title="Permalink to this definition">¶</a></dt>
 <dd><p>Eliminate common subexpressions.</p>
@@ -855,6 +883,25 @@ operators with no annotated device.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.relay.transform.SimplifyFCTranspose">
+<code class="sig-prename descclassname">tvm.relay.transform.</code><code class="sig-name descname">SimplifyFCTranspose</code><span class="sig-paren">(</span><em class="sig-param">target_weight_name</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.transform.SimplifyFCTranspose" title="Permalink to this definition">¶</a></dt>
+<dd><p>Rewrite <code class="docutils literal notranslate"><span class="pre">`y</span> <span class="pre">=</span> <span class="pre">nn.dense(x,</span> <span class="pre">transpose(w,</span> <span class="pre">[1,</span> <span class="pre">0]))`</span></code> to <code class="docutils literal notranslate"><span class="pre">`y</span> <span class="pre">=</span> <span class="pre">nn.dense(x,</span> <span class="pre">wt)`</span></code>
+This pass is used in <code class="docutils literal notranslate"><span class="pre">`data_dep_optimization.simplify_fc_transpose`</span></code></p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>weight_name</strong> (<a class="reference internal" href="../ir.html#tvm.ir.Array" title="tvm.ir.Array"><em>Array</em></a><em>[</em><a class="reference internal" href="../runtime.html#tvm.runtime.String" title="tvm.runtime.String"><em>String</em></a><em>]</em>) – Names of weights which qualified <code class="docutils literal notranslate"><span class="pre">`y</span> <span class="pre">=</span> <span class="pre">nn.dense(x,</span> <span class="pre">transpose [...]
+This parameter is generated by <code class="docutils literal notranslate"><span class="pre">`analysis.search_fc_transpose`</span></code> function</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>ret</strong> – The registered SimplifyFCTranspose pass.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="../ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.relay.transform.SimplifyInference">
 <code class="sig-prename descclassname">tvm.relay.transform.</code><code class="sig-name descname">SimplifyInference</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.relay.transform.SimplifyInference" title="Permalink to this definition">¶</a></dt>
 <dd><p>Simplify the data-flow graph for inference phase. An simplified expression
diff --git a/docs/api/python/te.html b/docs/api/python/te.html
index 06e3697..5f61395 100644
--- a/docs/api/python/te.html
+++ b/docs/api/python/te.html
@@ -2426,35 +2426,15 @@ HalideIR.</p>
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.te.hybrid.decorate" title="tvm.te.hybrid.decorate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">decorate</span></code></a>(func, fwrapped)</p></td>
 <td><p>A wrapper call of decorator package, differs to call time</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.te.hybrid.form_body" title="tvm.te.hybrid.form_body"><code class="xref py py-obj docutils literal notranslate"><span class="pre">form_body</span></code></a>(sch)</p></td>
-<td><p>According to the given schedule, form the raw body</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.te.hybrid.script" title="tvm.te.hybrid.script"><code class="xref py py-obj docutils literal notranslate"><span class="pre">script</span></code></a>(pyfunc)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.te.hybrid.script" title="tvm.te.hybrid.script"><code class="xref py py-obj docutils literal notranslate"><span class="pre">script</span></code></a>(pyfunc)</p></td>
 <td><p>Decorate a python function function as hybrid script.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.te.hybrid.source_to_op" title="tvm.te.hybrid.source_to_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">source_to_op</span></code></a>(src, args, symbols, closure_vars)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.te.hybrid.source_to_op" title="tvm.te.hybrid.source_to_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">source_to_op</span></code></a>(src, args, symbols, closure_vars)</p></td>
 <td><p>Another level of wrapper</p></td>
 </tr>
 </tbody>
 </table>
 <dl class="function">
-<dt id="tvm.te.hybrid.form_body">
-<code class="sig-prename descclassname">tvm.te.hybrid.</code><code class="sig-name descname">form_body</code><span class="sig-paren">(</span><em class="sig-param">sch</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.te.hybrid.form_body" title="Permalink to this definition">¶</a></dt>
-<dd><p>According to the given schedule, form the raw body
-:param sch:
-:type sch: tvm.te.schedule.Schedule
-:param The given scheduler to form the raw body:</p>
-<dl class="field-list simple">
-<dt class="field-odd">Returns</dt>
-<dd class="field-odd"><p></p>
-</dd>
-<dt class="field-even">Return type</dt>
-<dd class="field-even"><p>The body formed according to the given schedule</p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="function">
 <dt id="tvm.te.hybrid.decorate">
 <code class="sig-prename descclassname">tvm.te.hybrid.</code><code class="sig-name descname">decorate</code><span class="sig-paren">(</span><em class="sig-param">func</em>, <em class="sig-param">fwrapped</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.te.hybrid.decorate" title="Permalink to this definition">¶</a></dt>
 <dd><p>A wrapper call of decorator package, differs to call time</p>
diff --git a/docs/api/python/tir.html b/docs/api/python/tir.html
index f502402..109bc4d 100644
--- a/docs/api/python/tir.html
+++ b/docs/api/python/tir.html
@@ -248,136 +248,139 @@
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.BufferLoad" title="tvm.tir.BufferLoad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BufferLoad</span></code></a>(buffer, indices)</p></td>
 <td><p>Buffer load node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.BufferStore" title="tvm.tir.BufferStore"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BufferStore</span></code></a>(buffer, value, indices)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.BufferRealize" title="tvm.tir.BufferRealize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BufferRealize</span></code></a>(buffer, bounds, condition, body)</p></td>
+<td><p>Buffer realize node.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.BufferStore" title="tvm.tir.BufferStore"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BufferStore</span></code></a>(buffer, value, indices)</p></td>
 <td><p>Buffer store node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Call" title="tvm.tir.Call"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Call</span></code></a>(dtype, name, args, call_type, func, …)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Call" title="tvm.tir.Call"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Call</span></code></a>(dtype, name, args, call_type, func, …)</p></td>
 <td><p>Call node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Cast" title="tvm.tir.Cast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Cast</span></code></a>(dtype, value)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Cast" title="tvm.tir.Cast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Cast</span></code></a>(dtype, value)</p></td>
 <td><p>Cast expression.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Div" title="tvm.tir.Div"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Div</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Div" title="tvm.tir.Div"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Div</span></code></a>(a, b)</p></td>
 <td><p>Div node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.EQ" title="tvm.tir.EQ"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EQ</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.EQ" title="tvm.tir.EQ"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EQ</span></code></a>(a, b)</p></td>
 <td><p>EQ node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Evaluate" title="tvm.tir.Evaluate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Evaluate</span></code></a>(value)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Evaluate" title="tvm.tir.Evaluate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Evaluate</span></code></a>(value)</p></td>
 <td><p>Evaluate node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.FloatImm" title="tvm.tir.FloatImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloatImm</span></code></a>(dtype, value)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.FloatImm" title="tvm.tir.FloatImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloatImm</span></code></a>(dtype, value)</p></td>
 <td><p>Float constant.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.FloorDiv" title="tvm.tir.FloorDiv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloorDiv</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.FloorDiv" title="tvm.tir.FloorDiv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloorDiv</span></code></a>(a, b)</p></td>
 <td><p>FloorDiv node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.FloorMod" title="tvm.tir.FloorMod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloorMod</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.FloorMod" title="tvm.tir.FloorMod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">FloorMod</span></code></a>(a, b)</p></td>
 <td><p>FloorMod node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.For" title="tvm.tir.For"><code class="xref py py-obj docutils literal notranslate"><span class="pre">For</span></code></a>(loop_var, min_val, extent, for_type, …)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.For" title="tvm.tir.For"><code class="xref py py-obj docutils literal notranslate"><span class="pre">For</span></code></a>(loop_var, min_val, extent, for_type, …)</p></td>
 <td><p>For node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Free" title="tvm.tir.Free"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Free</span></code></a>(buffer_var)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Free" title="tvm.tir.Free"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Free</span></code></a>(buffer_var)</p></td>
 <td><p>Free node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.GE" title="tvm.tir.GE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GE</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.GE" title="tvm.tir.GE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GE</span></code></a>(a, b)</p></td>
 <td><p>GE node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.GT" title="tvm.tir.GT"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GT</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.GT" title="tvm.tir.GT"><code class="xref py py-obj docutils literal notranslate"><span class="pre">GT</span></code></a>(a, b)</p></td>
 <td><p>GT node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.IfThenElse" title="tvm.tir.IfThenElse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IfThenElse</span></code></a>(condition, then_case, else_case)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.IfThenElse" title="tvm.tir.IfThenElse"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IfThenElse</span></code></a>(condition, then_case, else_case)</p></td>
 <td><p>IfThenElse node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.IntImm" title="tvm.tir.IntImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IntImm</span></code></a>(dtype, value)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.IntImm" title="tvm.tir.IntImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IntImm</span></code></a>(dtype, value)</p></td>
 <td><p>Int constant.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.IterVar" title="tvm.tir.IterVar"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IterVar</span></code></a>(dom, var, iter_type[, thread_tag])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.IterVar" title="tvm.tir.IterVar"><code class="xref py py-obj docutils literal notranslate"><span class="pre">IterVar</span></code></a>(dom, var, iter_type[, thread_tag])</p></td>
 <td><p>Represent iteration variable.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.LE" title="tvm.tir.LE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LE</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.LE" title="tvm.tir.LE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LE</span></code></a>(a, b)</p></td>
 <td><p>LE node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.LT" title="tvm.tir.LT"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LT</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.LT" title="tvm.tir.LT"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LT</span></code></a>(a, b)</p></td>
 <td><p>LT node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Layout" title="tvm.tir.Layout"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Layout</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Layout" title="tvm.tir.Layout"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Layout</span></code></a></p></td>
 <td><p>Layout is composed of upper cases, lower cases and numbers, where upper case indicates a primal axis and the corresponding lower case with factor size indicates the subordinate axis.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Let" title="tvm.tir.Let"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Let</span></code></a>(var, value, body)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Let" title="tvm.tir.Let"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Let</span></code></a>(var, value, body)</p></td>
 <td><p>Let node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.LetStmt" title="tvm.tir.LetStmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LetStmt</span></code></a>(var, value, body)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.LetStmt" title="tvm.tir.LetStmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LetStmt</span></code></a>(var, value, body)</p></td>
 <td><p>LetStmt node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Load" title="tvm.tir.Load"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Load</span></code></a>(dtype, buffer_var, index[, predicate])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Load" title="tvm.tir.Load"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Load</span></code></a>(dtype, buffer_var, index[, predicate])</p></td>
 <td><p>Load node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Max" title="tvm.tir.Max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Max</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Max" title="tvm.tir.Max"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Max</span></code></a>(a, b)</p></td>
 <td><p>Max node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Min" title="tvm.tir.Min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Min</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Min" title="tvm.tir.Min"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Min</span></code></a>(a, b)</p></td>
 <td><p>Min node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Mod" title="tvm.tir.Mod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Mod</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Mod" title="tvm.tir.Mod"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Mod</span></code></a>(a, b)</p></td>
 <td><p>Mod node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Mul" title="tvm.tir.Mul"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Mul</span></code></a>(a, b)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Mul" title="tvm.tir.Mul"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Mul</span></code></a>(a, b)</p></td>
 <td><p>Mul node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.NE" title="tvm.tir.NE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NE</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.NE" title="tvm.tir.NE"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NE</span></code></a>(a, b)</p></td>
 <td><p>NE node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Not" title="tvm.tir.Not"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Not</span></code></a>(a)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Not" title="tvm.tir.Not"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Not</span></code></a>(a)</p></td>
 <td><p>Not node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Or" title="tvm.tir.Or"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Or</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Or" title="tvm.tir.Or"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Or</span></code></a>(a, b)</p></td>
 <td><p>Or node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Prefetch" title="tvm.tir.Prefetch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Prefetch</span></code></a>(func, value_index, dtype, bounds)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Prefetch" title="tvm.tir.Prefetch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Prefetch</span></code></a>(buffer, bounds)</p></td>
 <td><p>Prefetch node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.PrimFunc" title="tvm.tir.PrimFunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PrimFunc</span></code></a>(params, body[, ret_type, …])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.PrimFunc" title="tvm.tir.PrimFunc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">PrimFunc</span></code></a>(params, body[, ret_type, …])</p></td>
 <td><p>A function declaration expression.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Provide" title="tvm.tir.Provide"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Provide</span></code></a>(func, value_index, value, args)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Provide" title="tvm.tir.Provide"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Provide</span></code></a>(func, value_index, value, args)</p></td>
 <td><p>Provide node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Ramp" title="tvm.tir.Ramp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Ramp</span></code></a>(base, stride, lanes)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Ramp" title="tvm.tir.Ramp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Ramp</span></code></a>(base, stride, lanes)</p></td>
 <td><p>Ramp node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Realize" title="tvm.tir.Realize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Realize</span></code></a>(func, value_index, dtype, bounds, …)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Realize" title="tvm.tir.Realize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Realize</span></code></a>(func, value_index, dtype, bounds, …)</p></td>
 <td><p>Realize node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Reduce" title="tvm.tir.Reduce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Reduce</span></code></a>(combiner, src, rdom, condition, …)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Reduce" title="tvm.tir.Reduce"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Reduce</span></code></a>(combiner, src, rdom, condition, …)</p></td>
 <td><p>Reduce node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Select" title="tvm.tir.Select"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Select</span></code></a>(condition, true_value, false_value)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Select" title="tvm.tir.Select"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Select</span></code></a>(condition, true_value, false_value)</p></td>
 <td><p>Select node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.SeqStmt" title="tvm.tir.SeqStmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SeqStmt</span></code></a>(seq)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.SeqStmt" title="tvm.tir.SeqStmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SeqStmt</span></code></a>(seq)</p></td>
 <td><p>Sequence of statements.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Shuffle" title="tvm.tir.Shuffle"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Shuffle</span></code></a>(vectors, indices)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Shuffle" title="tvm.tir.Shuffle"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Shuffle</span></code></a>(vectors, indices)</p></td>
 <td><p>Shuffle node.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.SizeVar" title="tvm.tir.SizeVar"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SizeVar</span></code></a>(name, dtype)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.SizeVar" title="tvm.tir.SizeVar"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SizeVar</span></code></a>(name, dtype)</p></td>
 <td><p>Symbolic variable to represent a tensor index size    which is greater or equal to zero.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Stmt" title="tvm.tir.Stmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Stmt</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Stmt" title="tvm.tir.Stmt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Stmt</span></code></a></p></td>
 <td><p>Base class of all the statements.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Store" title="tvm.tir.Store"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Store</span></code></a>(buffer_var, value, index[, predicate])</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Store" title="tvm.tir.Store"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Store</span></code></a>(buffer_var, value, index[, predicate])</p></td>
 <td><p>Store node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.StringImm" title="tvm.tir.StringImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StringImm</span></code></a>(value)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.StringImm" title="tvm.tir.StringImm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StringImm</span></code></a>(value)</p></td>
 <td><p>String constant.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Sub" title="tvm.tir.Sub"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Sub</span></code></a>(a, b)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Sub" title="tvm.tir.Sub"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Sub</span></code></a>(a, b)</p></td>
 <td><p>Sub node.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.Var" title="tvm.tir.Var"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Var</span></code></a>(name, dtype)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.Var" title="tvm.tir.Var"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Var</span></code></a>(name, dtype)</p></td>
 <td><p>Symbolic variable.</p></td>
 </tr>
 </tbody>
@@ -1565,6 +1568,22 @@ the correct branch.</p>
 </dd></dl>
 
 <dl class="class">
+<dt id="tvm.tir.BufferRealize">
+<em class="property">class </em><code class="sig-prename descclassname">tvm.tir.</code><code class="sig-name descname">BufferRealize</code><span class="sig-paren">(</span><em class="sig-param">buffer</em>, <em class="sig-param">bounds</em>, <em class="sig-param">condition</em>, <em class="sig-param">body</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.BufferRealize" title="Permalink to this definition">¶</a></dt>
+<dd><p>Buffer realize node.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>buffer</strong> (<a class="reference internal" href="#tvm.tir.Buffer" title="tvm.tir.Buffer"><em>Buffer</em></a>) – The buffer.</p></li>
+<li><p><strong>bounds</strong> (<em>List</em><em>[</em><a class="reference internal" href="ir.html#tvm.ir.Range" title="tvm.ir.Range"><em>Range</em></a><em>]</em>) – The value we to be stored.</p></li>
+<li><p><strong>condition</strong> (<a class="reference internal" href="ir.html#tvm.ir.PrimExpr" title="tvm.ir.PrimExpr"><em>PrimExpr</em></a>) – The realize condition.</p></li>
+<li><p><strong>body</strong> (<a class="reference internal" href="#tvm.tir.Stmt" title="tvm.tir.Stmt"><em>Stmt</em></a>) – The body of the statement.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="class">
 <dt id="tvm.tir.Store">
 <em class="property">class </em><code class="sig-prename descclassname">tvm.tir.</code><code class="sig-name descname">Store</code><span class="sig-paren">(</span><em class="sig-param">buffer_var</em>, <em class="sig-param">value</em>, <em class="sig-param">index</em>, <em class="sig-param">predicate=None</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.Store" title="Permalink to this definition">¶</a></dt>
 <dd><p>Store node.</p>
@@ -1697,14 +1716,12 @@ the correct branch.</p>
 
 <dl class="class">
 <dt id="tvm.tir.Prefetch">
-<em class="property">class </em><code class="sig-prename descclassname">tvm.tir.</code><code class="sig-name descname">Prefetch</code><span class="sig-paren">(</span><em class="sig-param">func</em>, <em class="sig-param">value_index</em>, <em class="sig-param">dtype</em>, <em class="sig-param">bounds</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.Prefetch" title="Permalink to this definition">¶</a></dt>
+<em class="property">class </em><code class="sig-prename descclassname">tvm.tir.</code><code class="sig-name descname">Prefetch</code><span class="sig-paren">(</span><em class="sig-param">buffer</em>, <em class="sig-param">bounds</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.Prefetch" title="Permalink to this definition">¶</a></dt>
 <dd><p>Prefetch node.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>func</strong> (<em>Operation</em>) – The operation to create the function.</p></li>
-<li><p><strong>value_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The output value index</p></li>
-<li><p><strong>dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The data type to be prefetched.</p></li>
+<li><p><strong>buffer</strong> (<a class="reference internal" href="#tvm.tir.Buffer" title="tvm.tir.Buffer"><em>Buffer</em></a>) – The buffer to be prefetched.</p></li>
 <li><p><strong>bounds</strong> (<em>list of Range</em>) – The bounds to be prefetched.</p></li>
 </ul>
 </dd>
@@ -2979,15 +2996,39 @@ specified axis;</p></li>
 <tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.Apply" title="tvm.tir.transform.Apply"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Apply</span></code></a>(ftransform)</p></td>
 <td><p>Apply ftransform to each function in the Module.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.CombineContextCall" title="tvm.tir.transform.CombineContextCall"><code class="xref py py-obj docutils literal notranslate"><span class="pre">CombineContextCall</span></code></a>()</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.CoProcSync" title="tvm.tir.transform.CoProcSync"><code class="xref py py-obj docutils literal notranslate"><span class="pre">CoProcSync</span></code></a>()</p></td>
+<td><p>Detect and insert sync points to co-processor.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.CombineContextCall" title="tvm.tir.transform.CombineContextCall"><code class="xref py py-obj docutils literal notranslate"><span class="pre">CombineContextCall</span></code></a>()</p></td>
 <td><p>Combine context calls in the host function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.Filter" title="tvm.tir.transform.Filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Filter</span></code></a>(fcond)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.Filter" title="tvm.tir.transform.Filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Filter</span></code></a>(fcond)</p></td>
 <td><p>Filter functions by the calling convention attribute.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.InferFragment" title="tvm.tir.transform.InferFragment"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InferFragment</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.InferFragment" title="tvm.tir.transform.InferFragment"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InferFragment</span></code></a>()</p></td>
 <td><p>Infer the TensorCore fragment infomation using tensor intrinsics.</p></td>
 </tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.InjectCopyIntrin" title="tvm.tir.transform.InjectCopyIntrin"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InjectCopyIntrin</span></code></a>(pragma_key, fintrin)</p></td>
+<td><p>Inject virtual thread loops.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.InjectDoubleBuffer" title="tvm.tir.transform.InjectDoubleBuffer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InjectDoubleBuffer</span></code></a>(split_loop_factor)</p></td>
+<td><p>Inject double buffer statements.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.InjectPrefetch" title="tvm.tir.transform.InjectPrefetch"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InjectPrefetch</span></code></a>()</p></td>
+<td><p>Inject prefetch instructions into stmt.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.InjectVirtualThread" title="tvm.tir.transform.InjectVirtualThread"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InjectVirtualThread</span></code></a>()</p></td>
+<td><p>Inject virtual thread loops.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.InstrumentBoundCheckers" title="tvm.tir.transform.InstrumentBoundCheckers"><code class="xref py py-obj docutils literal notranslate"><span class="pre">InstrumentBoundCheckers</span></code></a>()</p></td>
+<td><p>Instruments bound checkers.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.LiftAttrScope" title="tvm.tir.transform.LiftAttrScope"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LiftAttrScope</span></code></a>(attr_key)</p></td>
+<td><p>Lift common attrs with attr_key to outer scope.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.LoopPartition" title="tvm.tir.transform.LoopPartition"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LoopPartition</span></code></a>(split_const_loop)</p></td>
+<td><p>Inject virtual thread loops.</p></td>
+</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.LowerCustomDatatypes" title="tvm.tir.transform.LowerCustomDatatypes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LowerCustomDatatypes</span></code></a>()</p></td>
 <td><p>Lower custom datatypes.</p></td>
 </tr>
@@ -3012,16 +3053,37 @@ specified axis;</p></li>
 <tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.NarrowDataType" title="tvm.tir.transform.NarrowDataType"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NarrowDataType</span></code></a>(target_bits)</p></td>
 <td><p>Narrow down PrimExpr datatype in stmt to target_bits.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.SkipAssert" title="tvm.tir.transform.SkipAssert"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SkipAssert</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.RemoveNoOp" title="tvm.tir.transform.RemoveNoOp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RemoveNoOp</span></code></a>()</p></td>
+<td><p>Remove No Op from the Stmt.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.RewriteUnsafeSelect" title="tvm.tir.transform.RewriteUnsafeSelect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RewriteUnsafeSelect</span></code></a>()</p></td>
+<td><p>Detect and rewrite unsafe select that contains memory access.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.Simplify" title="tvm.tir.transform.Simplify"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Simplify</span></code></a>()</p></td>
+<td><p>Run arithmetic simplifications on the statements and expressions.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.SkipAssert" title="tvm.tir.transform.SkipAssert"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SkipAssert</span></code></a>()</p></td>
 <td><p>Skip assert stmt.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.SplitHostDevice" title="tvm.tir.transform.SplitHostDevice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SplitHostDevice</span></code></a>()</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.SplitHostDevice" title="tvm.tir.transform.SplitHostDevice"><code class="xref py py-obj docutils literal notranslate"><span class="pre">SplitHostDevice</span></code></a>()</p></td>
 <td><p>Split the function into a host function and device functions.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.ThreadSync" title="tvm.tir.transform.ThreadSync"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ThreadSync</span></code></a>(storage_scope)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.StorageFlatten" title="tvm.tir.transform.StorageFlatten"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageFlatten</span></code></a>(cache_line_size[, …])</p></td>
+<td><p>Flatten the multi-dimensional read/write to 1D.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.StorageRewrite" title="tvm.tir.transform.StorageRewrite"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StorageRewrite</span></code></a>()</p></td>
+<td><p>Rewrite storage allocation pattern.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.ThreadSync" title="tvm.tir.transform.ThreadSync"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ThreadSync</span></code></a>(storage_scope)</p></td>
 <td><p>Insert sync between parallel read/write of shared buffers.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.prim_func_pass" title="tvm.tir.transform.prim_func_pass"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prim_func_pass</span></code></a>([pass_func, opt_level, name, …])</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.UnrollLoop" title="tvm.tir.transform.UnrollLoop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">UnrollLoop</span></code></a>(auto_max_step, auto_max_depth, …)</p></td>
+<td><p>Unroll the constant loop marked by unroll.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#tvm.tir.transform.VectorizeLoop" title="tvm.tir.transform.VectorizeLoop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">VectorizeLoop</span></code></a>([enable_vectorize])</p></td>
+<td><p>Lower vectorization loops.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#tvm.tir.transform.prim_func_pass" title="tvm.tir.transform.prim_func_pass"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prim_func_pass</span></code></a>([pass_func, opt_level, name, …])</p></td>
 <td><p>Decorate a function pass.</p></td>
 </tr>
 </tbody>
@@ -3124,6 +3186,20 @@ pass class should be created through py:func:<cite>tvm.tir.transform.function_pa
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.tir.transform.CoProcSync">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">CoProcSync</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.CoProcSync" title="Permalink to this definition">¶</a></dt>
+<dd><p>Detect and insert sync points to co-processor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.tir.transform.CombineContextCall">
 <code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">CombineContextCall</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.CombineContextCall" title="Permalink to this definition">¶</a></dt>
 <dd><p>Combine context calls in the host function.</p>
@@ -3169,6 +3245,119 @@ pass class should be created through py:func:<cite>tvm.tir.transform.function_pa
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.tir.transform.InjectCopyIntrin">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">InjectCopyIntrin</code><span class="sig-paren">(</span><em class="sig-param">pragma_key</em>, <em class="sig-param">fintrin</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.InjectCopyIntrin" title="Permalink to this definition">¶</a></dt>
+<dd><p>Inject virtual thread loops.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>pragma_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The pragma key for hint of copy.</p></li>
+<li><p><strong>fintrin</strong> (<em>function</em>) – The function with signature copyintrin(src, dst, pad_before, pad_after, pad_value)</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.InjectDoubleBuffer">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">InjectDoubleBuffer</code><span class="sig-paren">(</span><em class="sig-param">split_loop_factor</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.InjectDoubleBuffer" title="Permalink to this definition">¶</a></dt>
+<dd><p>Inject double buffer statements.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>split_loop_factor</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – Loop splitting factor.</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.InjectPrefetch">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">InjectPrefetch</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.InjectPrefetch" title="Permalink to this definition">¶</a></dt>
+<dd><p>Inject prefetch instructions into stmt.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.InjectVirtualThread">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">InjectVirtualThread</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.InjectVirtualThread" title="Permalink to this definition">¶</a></dt>
+<dd><p>Inject virtual thread loops.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.InstrumentBoundCheckers">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">InstrumentBoundCheckers</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.InstrumentBoundCheckers" title="Permalink to this definition">¶</a></dt>
+<dd><p>Instruments bound checkers.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.LiftAttrScope">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">LiftAttrScope</code><span class="sig-paren">(</span><em class="sig-param">attr_key</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.LiftAttrScope" title="Permalink to this definition">¶</a></dt>
+<dd><p>Lift common attrs with attr_key to outer scope.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>attr_key</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.8)"><em>str</em></a>) – The attribute key to be checked.</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.LoopPartition">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">LoopPartition</code><span class="sig-paren">(</span><em class="sig-param">split_const_loop</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.LoopPartition" title="Permalink to this definition">¶</a></dt>
+<dd><p>Inject virtual thread loops.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>split_const_loop</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – Flag to enable partition for const loop.</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.tir.transform.LowerCustomDatatypes">
 <code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">LowerCustomDatatypes</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.LowerCustomDatatypes" title="Permalink to this definition">¶</a></dt>
 <dd><p>Lower custom datatypes.</p>
@@ -3297,6 +3486,48 @@ following the PackedFunc input signature.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.tir.transform.RemoveNoOp">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">RemoveNoOp</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.RemoveNoOp" title="Permalink to this definition">¶</a></dt>
+<dd><p>Remove No Op from the Stmt.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.RewriteUnsafeSelect">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">RewriteUnsafeSelect</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.RewriteUnsafeSelect" title="Permalink to this definition">¶</a></dt>
+<dd><p>Detect and rewrite unsafe select that contains memory access.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.Simplify">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">Simplify</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.Simplify" title="Permalink to this definition">¶</a></dt>
+<dd><p>Run arithmetic simplifications on the statements and expressions.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.tir.transform.SkipAssert">
 <code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">SkipAssert</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.SkipAssert" title="Permalink to this definition">¶</a></dt>
 <dd><p>Skip assert stmt.</p>
@@ -3325,6 +3556,43 @@ following the PackedFunc input signature.</p>
 </dd></dl>
 
 <dl class="function">
+<dt id="tvm.tir.transform.StorageFlatten">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">StorageFlatten</code><span class="sig-paren">(</span><em class="sig-param">cache_line_size</em>, <em class="sig-param">create_bound_attribute=False</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.StorageFlatten" title="Permalink to this definition">¶</a></dt>
+<dd><p>Flatten the multi-dimensional read/write to 1D.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>cache_line_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The size of CPU cache line.</p></li>
+<li><p><strong>create_bound_attribute</strong> – Whether to create bound attributes.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.StorageRewrite">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">StorageRewrite</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.StorageRewrite" title="Permalink to this definition">¶</a></dt>
+<dd><p>Rewrite storage allocation pattern.</p>
+<p>Moves the allocation to outer most possible scope.
+Trying to share space between allocations to make
+a static allocation plan when possible.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns</dt>
+<dd class="field-odd"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-even">Return type</dt>
+<dd class="field-even"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
 <dt id="tvm.tir.transform.ThreadSync">
 <code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">ThreadSync</code><span class="sig-paren">(</span><em class="sig-param">storage_scope</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.ThreadSync" title="Permalink to this definition">¶</a></dt>
 <dd><p>Insert sync between parallel read/write of shared buffers.</p>
@@ -3341,6 +3609,52 @@ following the PackedFunc input signature.</p>
 </dl>
 </dd></dl>
 
+<dl class="function">
+<dt id="tvm.tir.transform.UnrollLoop">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">UnrollLoop</code><span class="sig-paren">(</span><em class="sig-param">auto_max_step</em>, <em class="sig-param">auto_max_depth</em>, <em class="sig-param">auto_max_extent</em>, <em class="sig-param">explicit_unroll</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.UnrollLoop" title="Permalink to this definition">¶</a></dt>
+<dd><p>Unroll the constant loop marked by unroll.</p>
+<p>This pass also automatically attach pragma unroll tag to loops which meets the standard.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>auto_max_step</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – The maximum step before stop attach automatic unroll</p></li>
+<li><p><strong>auto_max_depth</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.8)"><em>int</em></a>) – <p>The maximum depth before stop attach automatic unroll</p>
+<dl class="simple">
+<dt>auto_max_extent<span class="classifier">int</span></dt><dd><p>The maximum extent of the loop we can unroll.
+This is an legacy option that do not take the loop total steps into account.</p>
+</dd>
+</dl>
+</p></li>
+<li><p><strong>explicit_unroll</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – Whether explicitly unroll the loop, or leave unroll annotation to codegen.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="function">
+<dt id="tvm.tir.transform.VectorizeLoop">
+<code class="sig-prename descclassname">tvm.tir.transform.</code><code class="sig-name descname">VectorizeLoop</code><span class="sig-paren">(</span><em class="sig-param">enable_vectorize=True</em><span class="sig-paren">)</span><a class="headerlink" href="#tvm.tir.transform.VectorizeLoop" title="Permalink to this definition">¶</a></dt>
+<dd><p>Lower vectorization loops.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><p><strong>enable_vectorize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – Whether vectorization is enabled.
+Will lower to scalar loop when it is turned off.</p>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>fpass</strong> – The result pass</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="ir.html#tvm.transform.Pass" title="tvm.transform.Pass">tvm.transform.Pass</a></p>
+</dd>
+</dl>
+</dd></dl>
+
 </div>
 <div class="section" id="module-tvm.tir.analysis">
 <span id="tvm-tir-analysis"></span><h1>tvm.tir.analysis<a class="headerlink" href="#module-tvm.tir.analysis" title="Permalink to this headline">¶</a></h1>
diff --git a/docs/api/python/topi.html b/docs/api/python/topi.html
index 5fe0d9e..5827db4 100644
--- a/docs/api/python/topi.html
+++ b/docs/api/python/topi.html
@@ -457,46 +457,49 @@ specific workload.</p>
 <tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.logical_not</span></code>(data)</p></td>
 <td><p>Compute element-wise logical not of data.</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.arange</span></code>(start[, stop, step, dtype])</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.logical_xor</span></code>(lhs, rhs)</p></td>
+<td><p>Compute element-wise logical xor of data.</p></td>
+</tr>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.arange</span></code>(start[, stop, step, dtype])</p></td>
 <td><p>Creates a tensor with evenly spaced values within a given interval.</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.stack</span></code>(a, axis)</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.stack</span></code>(a, axis)</p></td>
 <td><p>Repeats the whole array multiple times.</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.repeat</span></code>(a, repeats, axis)</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.repeat</span></code>(a, repeats, axis)</p></td>
 <td><p>Repeats elements of an array.</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.tile</span></code>(a, reps)</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.tile</span></code>(a, reps)</p></td>
 <td><p>Repeats the whole array multiple times.</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.shape</span></code>(array[, dtype])</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.shape</span></code>(array[, dtype])</p></td>
 <td><p>Get the shape of input array</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.ndarray_size</span></code>(array[, dtype])</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.ndarray_size</span></code>(array[, dtype])</p></td>
 <td><p>Get the number of elements of input array</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.layout_transform</span></code>(array, src_layout, …)</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.layout_transform</span></code>(array, src_layout, …)</p></td>
 <td><p>Transform the layout according to src_layout and dst_layout</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.resize</span></code>(data, size[, layout, …])</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.resize</span></code>(data, size[, layout, …])</p></td>
 <td><p>Perform resize operation on the data.</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.crop_and_resize</span></code>(data, boxes, …)</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.crop_and_resize</span></code>(data, boxes, …)</p></td>
 <td><p>Perform crop and resize operation on the data.</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.dilation2d</span></code></p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.image.dilation2d</span></code></p></td>
 <td><p>Dilation2D operators</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.argsort</span></code>(data[, valid_count, axis, …])</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.argsort</span></code>(data[, valid_count, axis, …])</p></td>
 <td><p>Performs sorting along the given axis and returns an array of indices having the same shape as an input array that index data in sorted order.</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.topk</span></code>(data[, k, axis, ret_type, …])</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.topk</span></code>(data[, k, axis, ret_type, …])</p></td>
 <td><p>Get the top k elements in an input tensor along the given axis.</p></td>
 </tr>
-<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.sequence_mask</span></code>(data, valid_length[, …])</p></td>
+<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.sequence_mask</span></code>(data, valid_length[, …])</p></td>
 <td><p>Sets all elements outside the expected length of the sequence to a constant value.</p></td>
 </tr>
-<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.one_hot</span></code>(indices, on_value, off_value, …)</p></td>
+<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">topi.one_hot</span></code>(indices, on_value, off_value, …)</p></td>
 <td><p>Returns a one-hot tensor where the locations repsented by indices take value on_value, other locations take value off_value.</p></td>
 </tr>
 </tbody>
@@ -1917,6 +1920,27 @@ Otherwise returns Tensor.</p>
 </dl>
 </dd></dl>
 
+<dl class="function">
+<dt id="topi.logical_xor">
+<code class="sig-prename descclassname">topi.</code><code class="sig-name descname">logical_xor</code><span class="sig-paren">(</span><em class="sig-param">lhs</em>, <em class="sig-param">rhs</em><span class="sig-paren">)</span><a class="headerlink" href="#topi.logical_xor" title="Permalink to this definition">¶</a></dt>
+<dd><p>Compute element-wise logical xor of data.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>lhs</strong> (<a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor"><em>tvm.te.Tensor</em></a><em> or </em><em>Expr</em>) – The left operand</p></li>
+<li><p><strong>rhs</strong> (<a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor"><em>tvm.te.Tensor</em></a><em> or </em><em>Expr</em>) – The right operand</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>ret</strong> – Returns Expr if both operands are Expr.
+Otherwise returns Tensor.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor">tvm.te.Tensor</a> or Expr</p>
+</dd>
+</dl>
+</dd></dl>
+
 </div>
 <div class="section" id="topi-nn">
 <h2>topi.nn<a class="headerlink" href="#topi-nn" title="Permalink to this headline">¶</a></h2>
diff --git a/docs/doxygen/analyzer_8h_source.html b/docs/doxygen/analyzer_8h_source.html
index 610af65..5e066ee 100644
--- a/docs/doxygen/analyzer_8h_source.html
+++ b/docs/doxygen/analyzer_8h_source.html
@@ -124,7 +124,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1arith_1_1IntSetAnalyzer_html"><div class="ttname"><a href="classtvm_1_1arith_1_1IntSetAnalyzer.html">tvm::arith::IntSetAnalyzer</a></div><div class="ttdoc">Integer set analyzer. </div><div class="ttdef"><b>Definition:</b> analyzer.h:356</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_a923d1bb833c984008772782e90cda37a"><div class="ttname"><a href="namespacetvm_1_1tir.html#a923d1bb833c984008772782e90cda37a">tvm::tir::Simplify</a></div><div class="ttdeci">PrimExpr Simplify(PrimExpr expr, Map&lt; Var, Range &gt; vrange=Map&lt; Var, Range &gt;())</div><div class="ttdoc">Simplify the expression. </div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1ConstIntBoundNode_html_a0761897bf16ab73b848bf360e9b195a3"><div class="ttname"><a href="classtvm_1_1arith_1_1ConstIntBoundNode.html#a0761897bf16ab73b848bf360e9b195a3">tvm::arith::ConstIntBoundNode::min_value</a></div><div class="ttdeci">int64_t min_value</div><div class="ttdef"><b>Definition:</b> analyzer.h:63</div></div>
-<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:395</div></div>
+<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:408</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html_a435eba3ac3a839d3c53b74acfdc10146"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html#a435eba3ac3a839d3c53b74acfdc10146">tvm::arith::Analyzer::const_int_bound</a></div><div class="ttdeci">ConstIntBoundAnalyzer const_int_bound</div><div class="ttdoc">sub-analyzer: const integer bound </div><div class="ttdef"><b>Definition:</b> analyzer.h:395</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1ConstIntBoundNode_html_a652c9c965a3942f1ca45f7929ddd554c"><div class="ttname"><a href="classtvm_1_1arith_1_1ConstIntBoundNode.html#a652c9c965a3942f1ca45f7929ddd554c">tvm::arith::ConstIntBoundNode::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> analyzer.h:83</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1RewriteSimplifier_html"><div class="ttname"><a href="classtvm_1_1arith_1_1RewriteSimplifier.html">tvm::arith::RewriteSimplifier</a></div><div class="ttdoc">Rewrite-rule based simplifier. </div><div class="ttdef"><b>Definition:</b> analyzer.h:248</div></div>
diff --git a/docs/doxygen/annotated.html b/docs/doxygen/annotated.html
index a9b67bc..5dc5538 100644
--- a/docs/doxygen/annotated.html
+++ b/docs/doxygen/annotated.html
@@ -205,103 +205,104 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <tr id="row_2_2_53_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" target="_self">FunctionNode</a></td><td class="desc">Relay <a class="el" href="classtvm_1_1relay_1_1Function.html" title="Managed reference to FunctionNode. ">Function</a> container </td></tr>
 <tr id="row_2_2_54_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html" target="_self">GetValidCountsAttrs</a></td><td class="desc">Attributes used in get_valid_counts operator </td></tr>
 <tr id="row_2_2_55_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html" target="_self">GlobalPool2DAttrs</a></td><td class="desc">Attributes for global pool operator </td></tr>
-<tr id="row_2_2_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">Id</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IdNode.html" target="_self">IdNode</a></td><td class="desc">The unique identifier of variables </td></tr>
-<tr id="row_2_2_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">If</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfNode.html" target="_self">IfNode</a></td><td class="desc">Container of <a class="el" href="classtvm_1_1relay_1_1If.html">If</a> </td></tr>
-<tr id="row_2_2_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html" target="_self">InitOpAttrs</a></td><td class="desc">Attributes that specify a tensor </td></tr>
-<tr id="row_2_2_61_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html" target="_self">InstanceNormAttrs</a></td><td class="desc">Attributes used in instance_norm operator </td></tr>
-<tr id="row_2_2_62_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">InterpreterClosure</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_63_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html" target="_self">InterpreterClosureObj</a></td><td class="desc">The container type of Closures used by the interpreter </td></tr>
-<tr id="row_2_2_64_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html" target="_self">L2NormalizeAttrs</a></td><td class="desc">Attributes for L2Normalize operator </td></tr>
-<tr id="row_2_2_65_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html" target="_self">LayerNormAttrs</a></td><td class="desc">Attributes used in layer_norm operator </td></tr>
-<tr id="row_2_2_66_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html" target="_self">LayoutTransformAttrs</a></td><td class="desc">Attributes for LayoutTransform operator </td></tr>
-<tr id="row_2_2_67_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html" target="_self">LeakyReluAttrs</a></td><td class="desc">Attributes for leaky relu operator </td></tr>
-<tr id="row_2_2_68_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">Let</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_69_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">A binding of a sub-network </td></tr>
-<tr id="row_2_2_70_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html" target="_self">LRNAttrs</a></td><td class="desc">Attributes for LRN operator </td></tr>
-<tr id="row_2_2_71_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">Match</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_72_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MatchNode.html" target="_self">MatchNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> container node </td></tr>
-<tr id="row_2_2_73_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html" target="_self">MaxPool1DAttrs</a></td><td class="desc">Attributes for 1D max pool operator </td></tr>
-<tr id="row_2_2_74_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html" target="_self">MaxPool2DAttrs</a></td><td class="desc">Attributes for max pool operator </td></tr>
-<tr id="row_2_2_75_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html" target="_self">MaxPool3DAttrs</a></td><td class="desc">Attributes for 3D max pool operator </td></tr>
-<tr id="row_2_2_76_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html" target="_self">MirrorPadAttrs</a></td><td class="desc">Attributes used for the MirrorPadding operator </td></tr>
-<tr id="row_2_2_77_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html" target="_self">MixedModeMutator</a></td><td class="desc">Non-recursive DFS Graph Traversal for Custom Rewriting Passes </td></tr>
-<tr id="row_2_2_78_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html" target="_self">MixedModeVisitor</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" title="A simple visitor wrapper around ExprFunctor. Recursively visit the content. ">ExprVisitor</a> which traverses the [...]
-<tr id="row_2_2_79_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html" target="_self">MultiBoxPriorAttrs</a></td><td class="desc">Attributes used in multibox_prior operators </td></tr>
-<tr id="row_2_2_80_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html" target="_self">MultiBoxTransformLocAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_81_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html" target="_self">NdarraySizeAttrs</a></td><td class="desc">Attributes for ndarray_size operator </td></tr>
-<tr id="row_2_2_82_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html" target="_self">NonMaximumSuppressionAttrs</a></td><td class="desc">Attributes used in non_maximum_suppression operator </td></tr>
-<tr id="row_2_2_83_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html" target="_self">OnDeviceAttrs</a></td><td class="desc">Options for the device annotation operators </td></tr>
-<tr id="row_2_2_84_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html" target="_self">OneHotAttrs</a></td><td class="desc">Attributes used in one-hot operator </td></tr>
-<tr id="row_2_2_85_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
-<tr id="row_2_2_86_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html" target="_self">OpImplementationNode</a></td><td class="desc">Operator implementation that includes compute and schedule function </td></tr>
-<tr id="row_2_2_87_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
-<tr id="row_2_2_88_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html" target="_self">OpSpecializationNode</a></td><td class="desc">Specialized implementations for operators under certain conditions </td></tr>
-<tr id="row_2_2_89_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
-<tr id="row_2_2_90_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html" target="_self">OpStrategyNode</a></td><td class="desc">Operator strategy to choose implementation </td></tr>
-<tr id="row_2_2_91_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html" target="_self">PadAttrs</a></td><td class="desc">Attributes used for the padding operator </td></tr>
-<tr id="row_2_2_92_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> is the base type for an ADT match pattern in Relay </td></tr>
-<tr id="row_2_2_93_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">PatternConstructor</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_94_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" target="_self">PatternConstructorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_2_2_95_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" target="_self">PatternFunctor</a></td><td class="desc">A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a more powerful visitor, since it allows you to define the types of further arguments to VisitPattern </td></tr>
-<tr id="row_2_2_96_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_97_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html" target="_self">PatternMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">ExprFunctor [...]
-<tr id="row_2_2_98_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternNode.html" target="_self">PatternNode</a></td><td class="desc">Base type for declaring relay pattern </td></tr>
-<tr id="row_2_2_99_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">PatternTuple</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_100_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html" target="_self">PatternTupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_2_2_101_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">PatternVar</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_102_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html" target="_self">PatternVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_2_2_103_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html" target="_self">PatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" title="A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a m [...]
-<tr id="row_2_2_104_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">PatternWildcard</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_105_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html" target="_self">PatternWildcardNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> container node </td></tr>
-<tr id="row_2_2_106_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PReluAttrs.html" target="_self">PReluAttrs</a></td><td class="desc">Attributes for prelu operator </td></tr>
-<tr id="row_2_2_107_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ProposalAttrs.html" target="_self">ProposalAttrs</a></td><td class="desc">Attributes used in proposal operators </td></tr>
-<tr id="row_2_2_108_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">RecClosure</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_109_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosureObj.html" target="_self">RecClosureObj</a></td><td class="desc">The container type of <a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> </td></tr>
-<tr id="row_2_2_110_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReduceAttrs.html" target="_self">ReduceAttrs</a></td><td class="desc">Attributes for Reduce operators </td></tr>
-<tr id="row_2_2_111_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">RefCreate</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_112_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreateNode.html" target="_self">RefCreateNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_113_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">RefRead</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_114_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefReadNode.html" target="_self">RefReadNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_115_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">RefValue</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_116_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RefValueObj.html" target="_self">RefValueObj</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_117_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">RefWrite</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_118_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWriteNode.html" target="_self">RefWriteNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_119_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RelayNode.html" target="_self">RelayNode</a></td><td class="desc">This is the base node container of all relay structures </td></tr>
-<tr id="row_2_2_120_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html" target="_self">RepeatAttrs</a></td><td class="desc">Attributes used in repeat operators </td></tr>
-<tr id="row_2_2_121_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeAttrs.html" target="_self">ReshapeAttrs</a></td><td class="desc">Attributes used in reshape operators </td></tr>
-<tr id="row_2_2_122_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ResizeAttrs.html" target="_self">ResizeAttrs</a></td><td class="desc">Attributes used in image resize operator </td></tr>
-<tr id="row_2_2_123_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseAttrs.html" target="_self">ReverseAttrs</a></td><td class="desc">Attributes used in reverse operators </td></tr>
-<tr id="row_2_2_124_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIAlignAttrs.html" target="_self">ROIAlignAttrs</a></td><td class="desc">Attributes used in roi_align operators </td></tr>
-<tr id="row_2_2_125_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html" target="_self">ROIPoolAttrs</a></td><td class="desc">Attributes used in roi_pool operators </td></tr>
-<tr id="row_2_2_126_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SequenceMaskAttrs.html" target="_self">SequenceMaskAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_127_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html" target="_self">ShapeFuncAttrs</a></td><td class="desc">Options for the shape function operator </td></tr>
-<tr id="row_2_2_128_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeOfAttrs.html" target="_self">ShapeOfAttrs</a></td><td class="desc">Attributes for ShapeOf operator </td></tr>
-<tr id="row_2_2_129_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SliceLikeAttrs.html" target="_self">SliceLikeAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_130_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SoftmaxAttrs.html" target="_self">SoftmaxAttrs</a></td><td class="desc">Attributes used in softmax operators </td></tr>
-<tr id="row_2_2_131_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseDenseAttrs.html" target="_self">SparseDenseAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
-<tr id="row_2_2_132_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseTransposeAttrs.html" target="_self">SparseTransposeAttrs</a></td><td class="desc">Attributes for sparse_transpose operator </td></tr>
-<tr id="row_2_2_133_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SplitAttrs.html" target="_self">SplitAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_134_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SqueezeAttrs.html" target="_self">SqueezeAttrs</a></td><td class="desc">Attributes used in squeeze operators </td></tr>
-<tr id="row_2_2_135_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StackAttrs.html" target="_self">StackAttrs</a></td><td class="desc">Attributes used in stack operators </td></tr>
-<tr id="row_2_2_136_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StridedSliceAttrs.html" target="_self">StridedSliceAttrs</a></td><td class="desc">Attributes for StridedSlice operator </td></tr>
-<tr id="row_2_2_137_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SubPixelAttrs.html" target="_self">SubPixelAttrs</a></td><td class="desc">Attributes used in subpixel operators </td></tr>
-<tr id="row_2_2_138_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TakeAttrs.html" target="_self">TakeAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_139_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">TempExpr</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_140_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExprNode.html" target="_self">TempExprNode</a></td><td class="desc">Base class of the temporary expression </td></tr>
-<tr id="row_2_2_141_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TileAttrs.html" target="_self">TileAttrs</a></td><td class="desc">Attributes used in tile operators </td></tr>
-<tr id="row_2_2_142_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TopKAttrs.html" target="_self">TopKAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_143_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TransposeAttrs.html" target="_self">TransposeAttrs</a></td><td class="desc">Attributes used in transpose operators </td></tr>
-<tr id="row_2_2_144_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">Tuple</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_145_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">TupleGetItem</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_146_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html" target="_self">TupleGetItemNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_147_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleNode.html" target="_self">TupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
-<tr id="row_2_2_148_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSampling3DAttrs.html" target="_self">UpSampling3DAttrs</a></td><td class="desc">Attributes for upsampling3d operator </td></tr>
-<tr id="row_2_2_149_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSamplingAttrs.html" target="_self">UpSamplingAttrs</a></td><td class="desc">Attributes for upsampling operator </td></tr>
-<tr id="row_2_2_150_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">Var</a></td><td class="desc"></td></tr>
-<tr id="row_2_2_151_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
-<tr id="row_2_2_152_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1YoloReorgAttrs.html" target="_self">YoloReorgAttrs</a></td><td class="desc">Attributes used in yolo reorg operators </td></tr>
+<tr id="row_2_2_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html" target="_self">GroupNormAttrs</a></td><td class="desc">Attributes used in group_norm operator </td></tr>
+<tr id="row_2_2_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">Id</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IdNode.html" target="_self">IdNode</a></td><td class="desc">The unique identifier of variables </td></tr>
+<tr id="row_2_2_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">If</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfNode.html" target="_self">IfNode</a></td><td class="desc">Container of <a class="el" href="classtvm_1_1relay_1_1If.html">If</a> </td></tr>
+<tr id="row_2_2_61_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html" target="_self">InitOpAttrs</a></td><td class="desc">Attributes that specify a tensor </td></tr>
+<tr id="row_2_2_62_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html" target="_self">InstanceNormAttrs</a></td><td class="desc">Attributes used in instance_norm operator </td></tr>
+<tr id="row_2_2_63_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">InterpreterClosure</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_64_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html" target="_self">InterpreterClosureObj</a></td><td class="desc">The container type of Closures used by the interpreter </td></tr>
+<tr id="row_2_2_65_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html" target="_self">L2NormalizeAttrs</a></td><td class="desc">Attributes for L2Normalize operator </td></tr>
+<tr id="row_2_2_66_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html" target="_self">LayerNormAttrs</a></td><td class="desc">Attributes used in layer_norm operator </td></tr>
+<tr id="row_2_2_67_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html" target="_self">LayoutTransformAttrs</a></td><td class="desc">Attributes for LayoutTransform operator </td></tr>
+<tr id="row_2_2_68_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html" target="_self">LeakyReluAttrs</a></td><td class="desc">Attributes for leaky relu operator </td></tr>
+<tr id="row_2_2_69_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">Let</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_70_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">A binding of a sub-network </td></tr>
+<tr id="row_2_2_71_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html" target="_self">LRNAttrs</a></td><td class="desc">Attributes for LRN operator </td></tr>
+<tr id="row_2_2_72_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">Match</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_73_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MatchNode.html" target="_self">MatchNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> container node </td></tr>
+<tr id="row_2_2_74_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html" target="_self">MaxPool1DAttrs</a></td><td class="desc">Attributes for 1D max pool operator </td></tr>
+<tr id="row_2_2_75_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html" target="_self">MaxPool2DAttrs</a></td><td class="desc">Attributes for max pool operator </td></tr>
+<tr id="row_2_2_76_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html" target="_self">MaxPool3DAttrs</a></td><td class="desc">Attributes for 3D max pool operator </td></tr>
+<tr id="row_2_2_77_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html" target="_self">MirrorPadAttrs</a></td><td class="desc">Attributes used for the MirrorPadding operator </td></tr>
+<tr id="row_2_2_78_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html" target="_self">MixedModeMutator</a></td><td class="desc">Non-recursive DFS Graph Traversal for Custom Rewriting Passes </td></tr>
+<tr id="row_2_2_79_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html" target="_self">MixedModeVisitor</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" title="A simple visitor wrapper around ExprFunctor. Recursively visit the content. ">ExprVisitor</a> which traverses the [...]
+<tr id="row_2_2_80_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html" target="_self">MultiBoxPriorAttrs</a></td><td class="desc">Attributes used in multibox_prior operators </td></tr>
+<tr id="row_2_2_81_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html" target="_self">MultiBoxTransformLocAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_82_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html" target="_self">NdarraySizeAttrs</a></td><td class="desc">Attributes for ndarray_size operator </td></tr>
+<tr id="row_2_2_83_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html" target="_self">NonMaximumSuppressionAttrs</a></td><td class="desc">Attributes used in non_maximum_suppression operator </td></tr>
+<tr id="row_2_2_84_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html" target="_self">OnDeviceAttrs</a></td><td class="desc">Options for the device annotation operators </td></tr>
+<tr id="row_2_2_85_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html" target="_self">OneHotAttrs</a></td><td class="desc">Attributes used in one-hot operator </td></tr>
+<tr id="row_2_2_86_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
+<tr id="row_2_2_87_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html" target="_self">OpImplementationNode</a></td><td class="desc">Operator implementation that includes compute and schedule function </td></tr>
+<tr id="row_2_2_88_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
+<tr id="row_2_2_89_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html" target="_self">OpSpecializationNode</a></td><td class="desc">Specialized implementations for operators under certain conditions </td></tr>
+<tr id="row_2_2_90_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
+<tr id="row_2_2_91_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html" target="_self">OpStrategyNode</a></td><td class="desc">Operator strategy to choose implementation </td></tr>
+<tr id="row_2_2_92_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html" target="_self">PadAttrs</a></td><td class="desc">Attributes used for the padding operator </td></tr>
+<tr id="row_2_2_93_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> is the base type for an ADT match pattern in Relay </td></tr>
+<tr id="row_2_2_94_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">PatternConstructor</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_95_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" target="_self">PatternConstructorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_2_2_96_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" target="_self">PatternFunctor</a></td><td class="desc">A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a more powerful visitor, since it allows you to define the types of further arguments to VisitPattern </td></tr>
+<tr id="row_2_2_97_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_98_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html" target="_self">PatternMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">ExprFunctor [...]
+<tr id="row_2_2_99_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternNode.html" target="_self">PatternNode</a></td><td class="desc">Base type for declaring relay pattern </td></tr>
+<tr id="row_2_2_100_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">PatternTuple</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_101_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html" target="_self">PatternTupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_2_2_102_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">PatternVar</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_103_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html" target="_self">PatternVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_2_2_104_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html" target="_self">PatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" title="A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a m [...]
+<tr id="row_2_2_105_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">PatternWildcard</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_106_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html" target="_self">PatternWildcardNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> container node </td></tr>
+<tr id="row_2_2_107_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PReluAttrs.html" target="_self">PReluAttrs</a></td><td class="desc">Attributes for prelu operator </td></tr>
+<tr id="row_2_2_108_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ProposalAttrs.html" target="_self">ProposalAttrs</a></td><td class="desc">Attributes used in proposal operators </td></tr>
+<tr id="row_2_2_109_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">RecClosure</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_110_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosureObj.html" target="_self">RecClosureObj</a></td><td class="desc">The container type of <a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> </td></tr>
+<tr id="row_2_2_111_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReduceAttrs.html" target="_self">ReduceAttrs</a></td><td class="desc">Attributes for Reduce operators </td></tr>
+<tr id="row_2_2_112_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">RefCreate</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_113_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreateNode.html" target="_self">RefCreateNode</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_114_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">RefRead</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_115_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefReadNode.html" target="_self">RefReadNode</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_116_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">RefValue</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_117_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RefValueObj.html" target="_self">RefValueObj</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_118_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">RefWrite</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_119_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWriteNode.html" target="_self">RefWriteNode</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_120_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RelayNode.html" target="_self">RelayNode</a></td><td class="desc">This is the base node container of all relay structures </td></tr>
+<tr id="row_2_2_121_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html" target="_self">RepeatAttrs</a></td><td class="desc">Attributes used in repeat operators </td></tr>
+<tr id="row_2_2_122_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeAttrs.html" target="_self">ReshapeAttrs</a></td><td class="desc">Attributes used in reshape operators </td></tr>
+<tr id="row_2_2_123_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ResizeAttrs.html" target="_self">ResizeAttrs</a></td><td class="desc">Attributes used in image resize operator </td></tr>
+<tr id="row_2_2_124_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseAttrs.html" target="_self">ReverseAttrs</a></td><td class="desc">Attributes used in reverse operators </td></tr>
+<tr id="row_2_2_125_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIAlignAttrs.html" target="_self">ROIAlignAttrs</a></td><td class="desc">Attributes used in roi_align operators </td></tr>
+<tr id="row_2_2_126_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html" target="_self">ROIPoolAttrs</a></td><td class="desc">Attributes used in roi_pool operators </td></tr>
+<tr id="row_2_2_127_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SequenceMaskAttrs.html" target="_self">SequenceMaskAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_128_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html" target="_self">ShapeFuncAttrs</a></td><td class="desc">Options for the shape function operator </td></tr>
+<tr id="row_2_2_129_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeOfAttrs.html" target="_self">ShapeOfAttrs</a></td><td class="desc">Attributes for ShapeOf operator </td></tr>
+<tr id="row_2_2_130_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SliceLikeAttrs.html" target="_self">SliceLikeAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_131_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SoftmaxAttrs.html" target="_self">SoftmaxAttrs</a></td><td class="desc">Attributes used in softmax operators </td></tr>
+<tr id="row_2_2_132_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseDenseAttrs.html" target="_self">SparseDenseAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
+<tr id="row_2_2_133_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseTransposeAttrs.html" target="_self">SparseTransposeAttrs</a></td><td class="desc">Attributes for sparse_transpose operator </td></tr>
+<tr id="row_2_2_134_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SplitAttrs.html" target="_self">SplitAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_135_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SqueezeAttrs.html" target="_self">SqueezeAttrs</a></td><td class="desc">Attributes used in squeeze operators </td></tr>
+<tr id="row_2_2_136_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StackAttrs.html" target="_self">StackAttrs</a></td><td class="desc">Attributes used in stack operators </td></tr>
+<tr id="row_2_2_137_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StridedSliceAttrs.html" target="_self">StridedSliceAttrs</a></td><td class="desc">Attributes for StridedSlice operator </td></tr>
+<tr id="row_2_2_138_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SubPixelAttrs.html" target="_self">SubPixelAttrs</a></td><td class="desc">Attributes used in subpixel operators </td></tr>
+<tr id="row_2_2_139_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TakeAttrs.html" target="_self">TakeAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_140_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">TempExpr</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_141_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExprNode.html" target="_self">TempExprNode</a></td><td class="desc">Base class of the temporary expression </td></tr>
+<tr id="row_2_2_142_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TileAttrs.html" target="_self">TileAttrs</a></td><td class="desc">Attributes used in tile operators </td></tr>
+<tr id="row_2_2_143_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TopKAttrs.html" target="_self">TopKAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_144_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TransposeAttrs.html" target="_self">TransposeAttrs</a></td><td class="desc">Attributes used in transpose operators </td></tr>
+<tr id="row_2_2_145_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">Tuple</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_146_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">TupleGetItem</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_147_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html" target="_self">TupleGetItemNode</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_148_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleNode.html" target="_self">TupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
+<tr id="row_2_2_149_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSampling3DAttrs.html" target="_self">UpSampling3DAttrs</a></td><td class="desc">Attributes for upsampling3d operator </td></tr>
+<tr id="row_2_2_150_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSamplingAttrs.html" target="_self">UpSamplingAttrs</a></td><td class="desc">Attributes for upsampling operator </td></tr>
+<tr id="row_2_2_151_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">Var</a></td><td class="desc"></td></tr>
+<tr id="row_2_2_152_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
+<tr id="row_2_2_153_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1YoloReorgAttrs.html" target="_self">YoloReorgAttrs</a></td><td class="desc">Attributes used in yolo reorg operators </td></tr>
 <tr id="row_2_3_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_2_3_" class="arrow" onclick="toggleFolder('2_3_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1runtime.html" target="_self">runtime</a></td><td class="desc"></td></tr>
 <tr id="row_2_3_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_2_3_0_" class="arrow" onclick="toggleFolder('2_3_0_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1runtime_1_1threading.html" target="_self">threading</a></td><td class="desc"></td></tr>
 <tr id="row_2_3_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1threading_1_1ThreadGroup.html" target="_self">ThreadGroup</a></td><td class="desc">A platform-agnostic abstraction for managing a collection of thread pool threads </td></tr>
@@ -406,82 +407,85 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <tr id="row_2_5_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html" target="_self">BufferLoad</a></td><td class="desc"></td></tr>
 <tr id="row_2_5_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html" target="_self">BufferLoadNode</a></td><td class="desc">Load value from the high dimension buffer </td></tr>
 <tr id="row_2_5_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferNode.html" target="_self">BufferNode</a></td><td class="desc">Node to represent a buffer </td></tr>
-<tr id="row_2_5_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStore.html" target="_self">BufferStore</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" target="_self">BufferStoreNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CallNode.html" target="_self">CallNode</a></td><td class="desc">Call node </td></tr>
-<tr id="row_2_5_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CastNode.html" target="_self">CastNode</a></td><td class="desc">Cast value from one data type to another </td></tr>
-<tr id="row_2_5_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html" target="_self">CmpOpNode</a></td><td class="desc">Base template to implement comparison ops </td></tr>
-<tr id="row_2_5_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducer.html" target="_self">CommReducer</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html" target="_self">CommReducerNode</a></td><td class="desc">A commutative reducer node to represent a commutative binary operator with identity element </td></tr>
-<tr id="row_2_5_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DivNode.html" target="_self">DivNode</a></td><td class="desc">/ b in the C semnatics </td></tr>
-<tr id="row_2_5_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EQNode.html" target="_self">EQNode</a></td><td class="desc">== b </td></tr>
-<tr id="row_2_5_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html" target="_self">EvaluateNode</a></td><td class="desc">Evaluates an expression. This is mostly used for putting a Call node into <a class="el" href="classtvm_1_1tir_1_1Stmt.html" title="Container of all statements. ">Stmt</a> </td></tr>
-<tr id="row_2_5_24_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html" target="_self">ExprDeepEqual</a></td><td class="desc">Compare two expressions recursively and check if they are equal to each other without var remapping </td></tr>
-<tr id="row_2_5_25_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html" target="_self">ExprFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerful Visitor, since it allows you to define function signatures of Visit Function </td></tr>
-<tr id="row_2_5_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html" target="_self">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_27_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html" target="_self">ExprMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html" title="ExprMutator that mutates expressions. ">ExprMutator</a> that mutates expressions </td></tr>
-<tr id="row_2_5_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html" target="_self">ExprVisitor</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html" title="ExprVisitor. ">ExprVisitor</a> </td></tr>
-<tr id="row_2_5_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html" target="_self">FloorDivNode</a></td><td class="desc">Floor division, floor(a/b) </td></tr>
-<tr id="row_2_5_30_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html" target="_self">FloorModNode</a></td><td class="desc">The remainder of the floordiv </td></tr>
-<tr id="row_2_5_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ForNode.html" target="_self">ForNode</a></td><td class="desc">A for loop, with poissible type annotations </td></tr>
-<tr id="row_2_5_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FreeNode.html" target="_self">FreeNode</a></td><td class="desc">Free the resources in the buffer before the scope ends </td></tr>
-<tr id="row_2_5_33_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FunctionBaseNode.html" target="_self">FunctionBaseNode</a></td><td class="desc">Base node of internal functions </td></tr>
-<tr id="row_2_5_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html" target="_self">FunctionRef</a></td><td class="desc">Reference to a function </td></tr>
-<tr id="row_2_5_35_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GENode.html" target="_self">GENode</a></td><td class="desc">&gt;= b </td></tr>
-<tr id="row_2_5_36_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GTNode.html" target="_self">GTNode</a></td><td class="desc">&gt; b </td></tr>
-<tr id="row_2_5_37_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html" target="_self">IfThenElseNode</a></td><td class="desc">IfThenElse statment </td></tr>
-<tr id="row_2_5_38_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVar.html" target="_self">IterVar</a></td><td class="desc">Iteration Variable, represents an iteration over an integer interval </td></tr>
-<tr id="row_2_5_39_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html" target="_self">IterVarNode</a></td><td class="desc">An iteration variable representing an iteration over a one dimensional interval </td></tr>
-<tr id="row_2_5_40_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Layout.html" target="_self">Layout</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1Layout.html" title="Layout is to describe how data is organized within an N-dimention tensor. It is composed of upper cas...">Layout</a> is to describe how data is organized withi [...]
-<tr id="row_2_5_41_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LayoutAxis.html" target="_self">LayoutAxis</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_42_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LayoutNode.html" target="_self">LayoutNode</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_43_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1LENode.html" target="_self">LENode</a></td><td class="desc">&lt;= b </td></tr>
-<tr id="row_2_5_44_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">Let binding. Bind var to value then evaluate body </td></tr>
-<tr id="row_2_5_45_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html" target="_self">LetStmtNode</a></td><td class="desc">Let binding, bind var to value, then run body </td></tr>
-<tr id="row_2_5_46_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LoadNode.html" target="_self">LoadNode</a></td><td class="desc">Load the value from buffer_var </td></tr>
-<tr id="row_2_5_47_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LTNode.html" target="_self">LTNode</a></td><td class="desc">&lt; b </td></tr>
-<tr id="row_2_5_48_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MaxNode.html" target="_self">MaxNode</a></td><td class="desc">Max(a, b) </td></tr>
-<tr id="row_2_5_49_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MinNode.html" target="_self">MinNode</a></td><td class="desc">Min(a, b) </td></tr>
-<tr id="row_2_5_50_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ModNode.html" target="_self">ModNode</a></td><td class="desc">% b in the C semnatics </td></tr>
-<tr id="row_2_5_51_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MulNode.html" target="_self">MulNode</a></td><td class="desc"><ul>
+<tr id="row_2_5_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html" target="_self">BufferRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate ...">BufferR [...]
+<tr id="row_2_5_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" target="_self">BufferRealizeNode</a></td><td class="desc">Annotate the region where the buffer need to be read and write in the body. We only need to allocate the space for the corresponding region </td></tr>
+<tr id="row_2_5_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStore.html" target="_self">BufferStore</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a> </td></tr>
+<tr id="row_2_5_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" target="_self">BufferStoreNode</a></td><td class="desc">Store value to the high dimension buffer </td></tr>
+<tr id="row_2_5_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CallNode.html" target="_self">CallNode</a></td><td class="desc">Call node </td></tr>
+<tr id="row_2_5_19_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CastNode.html" target="_self">CastNode</a></td><td class="desc">Cast value from one data type to another </td></tr>
+<tr id="row_2_5_20_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html" target="_self">CmpOpNode</a></td><td class="desc">Base template to implement comparison ops </td></tr>
+<tr id="row_2_5_21_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducer.html" target="_self">CommReducer</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_22_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html" target="_self">CommReducerNode</a></td><td class="desc">A commutative reducer node to represent a commutative binary operator with identity element </td></tr>
+<tr id="row_2_5_23_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DivNode.html" target="_self">DivNode</a></td><td class="desc">/ b in the C semnatics </td></tr>
+<tr id="row_2_5_24_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EQNode.html" target="_self">EQNode</a></td><td class="desc">== b </td></tr>
+<tr id="row_2_5_25_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html" target="_self">EvaluateNode</a></td><td class="desc">Evaluates an expression. This is mostly used for putting a Call node into <a class="el" href="classtvm_1_1tir_1_1Stmt.html" title="Container of all statements. ">Stmt</a> </td></tr>
+<tr id="row_2_5_26_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html" target="_self">ExprDeepEqual</a></td><td class="desc">Compare two expressions recursively and check if they are equal to each other without var remapping </td></tr>
+<tr id="row_2_5_27_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html" target="_self">ExprFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerful Visitor, since it allows you to define function signatures of Visit Function </td></tr>
+<tr id="row_2_5_28_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html" target="_self">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_29_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html" target="_self">ExprMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html" title="ExprMutator that mutates expressions. ">ExprMutator</a> that mutates expressions </td></tr>
+<tr id="row_2_5_30_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html" target="_self">ExprVisitor</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html" title="ExprVisitor. ">ExprVisitor</a> </td></tr>
+<tr id="row_2_5_31_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html" target="_self">FloorDivNode</a></td><td class="desc">Floor division, floor(a/b) </td></tr>
+<tr id="row_2_5_32_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html" target="_self">FloorModNode</a></td><td class="desc">The remainder of the floordiv </td></tr>
+<tr id="row_2_5_33_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ForNode.html" target="_self">ForNode</a></td><td class="desc">A for loop, with poissible type annotations </td></tr>
+<tr id="row_2_5_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FreeNode.html" target="_self">FreeNode</a></td><td class="desc">Free the resources in the buffer before the scope ends </td></tr>
+<tr id="row_2_5_35_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FunctionBaseNode.html" target="_self">FunctionBaseNode</a></td><td class="desc">Base node of internal functions </td></tr>
+<tr id="row_2_5_36_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html" target="_self">FunctionRef</a></td><td class="desc">Reference to a function </td></tr>
+<tr id="row_2_5_37_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GENode.html" target="_self">GENode</a></td><td class="desc">&gt;= b </td></tr>
+<tr id="row_2_5_38_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GTNode.html" target="_self">GTNode</a></td><td class="desc">&gt; b </td></tr>
+<tr id="row_2_5_39_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html" target="_self">IfThenElseNode</a></td><td class="desc">IfThenElse statment </td></tr>
+<tr id="row_2_5_40_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVar.html" target="_self">IterVar</a></td><td class="desc">Iteration Variable, represents an iteration over an integer interval </td></tr>
+<tr id="row_2_5_41_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html" target="_self">IterVarNode</a></td><td class="desc">An iteration variable representing an iteration over a one dimensional interval </td></tr>
+<tr id="row_2_5_42_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Layout.html" target="_self">Layout</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1Layout.html" title="Layout is to describe how data is organized within an N-dimention tensor. It is composed of upper cas...">Layout</a> is to describe how data is organized withi [...]
+<tr id="row_2_5_43_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LayoutAxis.html" target="_self">LayoutAxis</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_44_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LayoutNode.html" target="_self">LayoutNode</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_45_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1LENode.html" target="_self">LENode</a></td><td class="desc">&lt;= b </td></tr>
+<tr id="row_2_5_46_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">Let binding. Bind var to value then evaluate body </td></tr>
+<tr id="row_2_5_47_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html" target="_self">LetStmtNode</a></td><td class="desc">Let binding, bind var to value, then run body </td></tr>
+<tr id="row_2_5_48_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LoadNode.html" target="_self">LoadNode</a></td><td class="desc">Load the value from buffer_var </td></tr>
+<tr id="row_2_5_49_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LTNode.html" target="_self">LTNode</a></td><td class="desc">&lt; b </td></tr>
+<tr id="row_2_5_50_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MaxNode.html" target="_self">MaxNode</a></td><td class="desc">Max(a, b) </td></tr>
+<tr id="row_2_5_51_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MinNode.html" target="_self">MinNode</a></td><td class="desc">Min(a, b) </td></tr>
+<tr id="row_2_5_52_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ModNode.html" target="_self">ModNode</a></td><td class="desc">% b in the C semnatics </td></tr>
+<tr id="row_2_5_53_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MulNode.html" target="_self">MulNode</a></td><td class="desc"><ul>
 <li>b </li>
 </ul>
 </td></tr>
-<tr id="row_2_5_52_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NENode.html" target="_self">NENode</a></td><td class="desc">!= b </td></tr>
-<tr id="row_2_5_53_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NotNode.html" target="_self">NotNode</a></td><td class="desc">!a </td></tr>
-<tr id="row_2_5_54_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1OrNode.html" target="_self">OrNode</a></td><td class="desc">|| b </td></tr>
-<tr id="row_2_5_55_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" target="_self">PrefetchNode</a></td><td class="desc">A prefetch hint of func </td></tr>
-<tr id="row_2_5_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html" target="_self">PrimFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" title="Primitive functions that contains TIR statements. ">PrimFuncNode</a> </td></tr>
-<tr id="row_2_5_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" target="_self">PrimFuncNode</a></td><td class="desc">Primitive functions that contains TIR statements </td></tr>
-<tr id="row_2_5_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProvideNode.html" target="_self">ProvideNode</a></td><td class="desc">Store value into mult-dimensional array defined by func </td></tr>
-<tr id="row_2_5_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1RampNode.html" target="_self">RampNode</a></td><td class="desc">Construct a vector with lanes elements where its i-th element equals base + i * stride. This is useful to construct a index for a continuous vector load </td></tr>
-<tr id="row_2_5_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html" target="_self">RealizeNode</a></td><td class="desc">Annotate the bounds where func need to be written and read in body. We will need to allocate space for the corresponding regions </td></tr>
-<tr id="row_2_5_61_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ReduceNode.html" target="_self">ReduceNode</a></td><td class="desc">Reduction operator operator </td></tr>
-<tr id="row_2_5_62_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SelectNode.html" target="_self">SelectNode</a></td><td class="desc">Return true_value if condition is true, otherwise return false_value </td></tr>
-<tr id="row_2_5_63_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_2_5_63_" class="arrow" onclick="toggleFolder('2_5_63_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt.html" target="_self">SeqStmt</a></td><td class="desc">Sequence statement </td></tr>
-<tr id="row_2_5_63_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html" target="_self">Flattener</a></td><td class="desc">Helper class to flatten sequence of arguments into <a class="el" href="classtvm_1_1Array.html" title="Array container of NodeRef in DSL graph. Array implements copy on write semantics, which means ar [...]
-<tr id="row_2_5_64_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmtNode.html" target="_self">SeqStmtNode</a></td><td class="desc">The container of seq statement. Represent a sequence of statements </td></tr>
-<tr id="row_2_5_65_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ShuffleNode.html" target="_self">ShuffleNode</a></td><td class="desc">Shuffle instruction. vec = concat(vectors) result = (vec[indices[0]], vec[indices[1]] ...) </td></tr>
-<tr id="row_2_5_66_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVar.html" target="_self">SizeVar</a></td><td class="desc">Named variable represents a tensor index size </td></tr>
-<tr id="row_2_5_67_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVarNode.html" target="_self">SizeVarNode</a></td><td class="desc">A variable node represent a tensor index size, whose value must be non-negative </td></tr>
-<tr id="row_2_5_68_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Stmt.html" target="_self">Stmt</a></td><td class="desc">Container of all statements </td></tr>
-<tr id="row_2_5_69_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_self">StmtExprMutator</a></td><td class="desc">Mutator that recursively mutates stmts and exprs on them </td></tr>
-<tr id="row_2_5_70_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtExprVisitor.html" target="_self">StmtExprVisitor</a></td><td class="desc">Visitor that recursively visit stmts and exprs on them </td></tr>
-<tr id="row_2_5_71_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtFunctor.html" target="_self">StmtFunctor</a></td><td class="desc">Same as <a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">ExprFunctor</a> except it is a [...]
-<tr id="row_2_5_72_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8args_08_4.html" target="_self">StmtFunctor&lt; R(const Stmt &amp;n, Args...args)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_73_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html" target="_self">StmtMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html" title="StmtMutator that mutates the statements. ">StmtMutator</a> that mutates the statements </td></tr>
-<tr id="row_2_5_74_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtNode.html" target="_self">StmtNode</a></td><td class="desc">Base node of all statements </td></tr>
-<tr id="row_2_5_75_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html" target="_self">StmtVisitor</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html" title="StmtVisitor. ">StmtVisitor</a> </td></tr>
-<tr id="row_2_5_76_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StoreNode.html" target="_self">StoreNode</a></td><td class="desc">Store value to the buffer </td></tr>
-<tr id="row_2_5_77_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImm.html" target="_self">StringImm</a></td><td class="desc"></td></tr>
-<tr id="row_2_5_78_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImmNode.html" target="_self">StringImmNode</a></td><td class="desc">String constants, only used in asserts </td></tr>
-<tr id="row_2_5_79_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SubNode.html" target="_self">SubNode</a></td><td class="desc"><ul>
+<tr id="row_2_5_54_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NENode.html" target="_self">NENode</a></td><td class="desc">!= b </td></tr>
+<tr id="row_2_5_55_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NotNode.html" target="_self">NotNode</a></td><td class="desc">!a </td></tr>
+<tr id="row_2_5_56_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1OrNode.html" target="_self">OrNode</a></td><td class="desc">|| b </td></tr>
+<tr id="row_2_5_57_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Prefetch.html" target="_self">Prefetch</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for abuffer. ">PrefetchNode</a> </td></tr>
+<tr id="row_2_5_58_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" target="_self">PrefetchNode</a></td><td class="desc">A prefetch hint for abuffer </td></tr>
+<tr id="row_2_5_59_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html" target="_self">PrimFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" title="Primitive functions that contains TIR statements. ">PrimFuncNode</a> </td></tr>
+<tr id="row_2_5_60_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" target="_self">PrimFuncNode</a></td><td class="desc">Primitive functions that contains TIR statements </td></tr>
+<tr id="row_2_5_61_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProvideNode.html" target="_self">ProvideNode</a></td><td class="desc">Store value into mult-dimensional array defined by func </td></tr>
+<tr id="row_2_5_62_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1RampNode.html" target="_self">RampNode</a></td><td class="desc">Construct a vector with lanes elements where its i-th element equals base + i * stride. This is useful to construct a index for a continuous vector load </td></tr>
+<tr id="row_2_5_63_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html" target="_self">RealizeNode</a></td><td class="desc">Annotate the bounds where func need to be written and read in body. We will need to allocate space for the corresponding regions </td></tr>
+<tr id="row_2_5_64_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ReduceNode.html" target="_self">ReduceNode</a></td><td class="desc">Reduction operator operator </td></tr>
+<tr id="row_2_5_65_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SelectNode.html" target="_self">SelectNode</a></td><td class="desc">Return true_value if condition is true, otherwise return false_value </td></tr>
+<tr id="row_2_5_66_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_2_5_66_" class="arrow" onclick="toggleFolder('2_5_66_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt.html" target="_self">SeqStmt</a></td><td class="desc">Sequence statement </td></tr>
+<tr id="row_2_5_66_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html" target="_self">Flattener</a></td><td class="desc">Helper class to flatten sequence of arguments into <a class="el" href="classtvm_1_1Array.html" title="Array container of NodeRef in DSL graph. Array implements copy on write semantics, which means ar [...]
+<tr id="row_2_5_67_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmtNode.html" target="_self">SeqStmtNode</a></td><td class="desc">The container of seq statement. Represent a sequence of statements </td></tr>
+<tr id="row_2_5_68_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ShuffleNode.html" target="_self">ShuffleNode</a></td><td class="desc">Shuffle instruction. vec = concat(vectors) result = (vec[indices[0]], vec[indices[1]] ...) </td></tr>
+<tr id="row_2_5_69_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVar.html" target="_self">SizeVar</a></td><td class="desc">Named variable represents a tensor index size </td></tr>
+<tr id="row_2_5_70_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVarNode.html" target="_self">SizeVarNode</a></td><td class="desc">A variable node represent a tensor index size, whose value must be non-negative </td></tr>
+<tr id="row_2_5_71_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Stmt.html" target="_self">Stmt</a></td><td class="desc">Container of all statements </td></tr>
+<tr id="row_2_5_72_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_self">StmtExprMutator</a></td><td class="desc">Mutator that recursively mutates stmts and exprs on them </td></tr>
+<tr id="row_2_5_73_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtExprVisitor.html" target="_self">StmtExprVisitor</a></td><td class="desc">Visitor that recursively visit stmts and exprs on them </td></tr>
+<tr id="row_2_5_74_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtFunctor.html" target="_self">StmtFunctor</a></td><td class="desc">Same as <a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">ExprFunctor</a> except it is a [...]
+<tr id="row_2_5_75_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8args_08_4.html" target="_self">StmtFunctor&lt; R(const Stmt &amp;n, Args...args)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_76_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html" target="_self">StmtMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html" title="StmtMutator that mutates the statements. ">StmtMutator</a> that mutates the statements </td></tr>
+<tr id="row_2_5_77_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtNode.html" target="_self">StmtNode</a></td><td class="desc">Base node of all statements </td></tr>
+<tr id="row_2_5_78_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html" target="_self">StmtVisitor</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html" title="StmtVisitor. ">StmtVisitor</a> </td></tr>
+<tr id="row_2_5_79_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StoreNode.html" target="_self">StoreNode</a></td><td class="desc">Store value to the buffer </td></tr>
+<tr id="row_2_5_80_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImm.html" target="_self">StringImm</a></td><td class="desc"></td></tr>
+<tr id="row_2_5_81_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImmNode.html" target="_self">StringImmNode</a></td><td class="desc">String constants, only used in asserts </td></tr>
+<tr id="row_2_5_82_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SubNode.html" target="_self">SubNode</a></td><td class="desc"><ul>
 <li>b </li>
 </ul>
 </td></tr>
-<tr id="row_2_5_80_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1TensorKey.html" target="_self">TensorKey</a></td><td class="desc">Auxiliary data structure used in IR Pass to indicate a tensor </td></tr>
-<tr id="row_2_5_81_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Var.html" target="_self">Var</a></td><td class="desc">Named variable in TVM </td></tr>
-<tr id="row_2_5_82_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">A variable node in the IR </td></tr>
+<tr id="row_2_5_83_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1tir_1_1TensorKey.html" target="_self">TensorKey</a></td><td class="desc">Auxiliary data structure used in IR Pass to indicate a tensor </td></tr>
+<tr id="row_2_5_84_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Var.html" target="_self">Var</a></td><td class="desc">Named variable in TVM </td></tr>
+<tr id="row_2_5_85_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">A variable node in the IR </td></tr>
 <tr id="row_2_6_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_2_6_" class="arrow" onclick="toggleFolder('2_6_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1transform.html" target="_self">transform</a></td><td class="desc"></td></tr>
 <tr id="row_2_6_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1Pass.html" target="_self">Pass</a></td><td class="desc"></td></tr>
 <tr id="row_2_6_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1PassContext.html" target="_self">PassContext</a></td><td class="desc"><a class="el" href="classtvm_1_1transform_1_1PassContext.html" title="PassContext that is used to configure the pass behavior. ">PassContext</a> that is used to configure the pass behavior </td></tr>
diff --git a/docs/doxygen/bias__add_8h_source.html b/docs/doxygen/bias__add_8h_source.html
index f61db49..7700470 100644
--- a/docs/doxygen/bias__add_8h_source.html
+++ b/docs/doxygen/bias__add_8h_source.html
@@ -95,7 +95,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="namespacetopi_html"><div class="ttname"><a href="namespacetopi.html">topi</a></div><div class="ttdef"><b>Definition:</b> broadcast.h:34</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1Tensor_html"><div class="ttname"><a href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a></div><div class="ttdoc">Tensor structure representing a possible input, or intermediate computation result. </div><div class="ttdef"><b>Definition:</b> tensor.h:52</div></div>
 <div class="ttc" id="operation_8h_html"><div class="ttname"><a href="operation_8h.html">operation.h</a></div><div class="ttdoc">Operation node can generate one or multiple Tensors. </div></div>
-<div class="ttc" id="namespacetopi_html_aa894e21659f08e663915f88a16f64b2d"><div class="ttname"><a href="namespacetopi.html#aa894e21659f08e663915f88a16f64b2d">topi::add</a></div><div class="ttdeci">tvm::PrimExpr add(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:196</div></div>
+<div class="ttc" id="namespacetopi_html_aa894e21659f08e663915f88a16f64b2d"><div class="ttname"><a href="namespacetopi.html#aa894e21659f08e663915f88a16f64b2d">topi::add</a></div><div class="ttdeci">tvm::PrimExpr add(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:209</div></div>
 <div class="ttc" id="tags_8h_html"><div class="ttname"><a href="tags_8h.html">tags.h</a></div><div class="ttdoc">External function interface to rocBLAS libraries. </div></div>
 <div class="ttc" id="broadcast_8h_html"><div class="ttname"><a href="broadcast_8h.html">broadcast.h</a></div><div class="ttdoc">Broadcast op constructions. </div></div>
 <div class="ttc" id="topi_2include_2topi_2transform_8h_html"><div class="ttname"><a href="topi_2include_2topi_2transform_8h.html">transform.h</a></div><div class="ttdoc">Transform op constructors. </div></div>
diff --git a/docs/doxygen/bound_8h.html b/docs/doxygen/bound_8h.html
index 5ff6839..b6711a7 100644
--- a/docs/doxygen/bound_8h.html
+++ b/docs/doxygen/bound_8h.html
@@ -135,9 +135,9 @@ Functions</h2></td></tr>
 <tr class="memitem:a3658d391f2333f405abdd54a9f174c9f"><td class="memItemLeft" align="right" valign="top">IntSet&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1arith.html#a3658d391f2333f405abdd54a9f174c9f">tvm::arith::DeduceBound</a> (PrimExpr v, PrimExpr cond, const std::unordered_map&lt; const VarNode *, IntSet &gt; &amp;hint_map, const std::unordered_map&lt; const VarNode *, IntSet &gt; &amp;relax_map)</td></tr>
 <tr class="memdesc:a3658d391f2333f405abdd54a9f174c9f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Same as DeduceBound with unordered_map signature.  <a href="namespacetvm_1_1arith.html#a3658d391f2333f405abdd54a9f174c9f">More...</a><br /></td></tr>
 <tr class="separator:a3658d391f2333f405abdd54a9f174c9f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a7f55bb82049e1b5f3a22fbd7b7ef3570"><td class="memItemLeft" align="right" valign="top">Domain&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1arith.html#a7f55bb82049e1b5f3a22fbd7b7ef3570">tvm::arith::DomainTouched</a> (Stmt body, const te::Tensor &amp;tensor, bool consider_calls, bool consider_provides)</td></tr>
-<tr class="memdesc:a7f55bb82049e1b5f3a22fbd7b7ef3570"><td class="mdescLeft">&#160;</td><td class="mdescRight">Infer a regular domain that covers all the calls or provides within the given statement.  <a href="namespacetvm_1_1arith.html#a7f55bb82049e1b5f3a22fbd7b7ef3570">More...</a><br /></td></tr>
-<tr class="separator:a7f55bb82049e1b5f3a22fbd7b7ef3570"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:af183d4d8ee419f3df68d4be1f29edeea"><td class="memItemLeft" align="right" valign="top">Domain&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1arith.html#af183d4d8ee419f3df68d4be1f29edeea">tvm::arith::DomainTouched</a> (const Stmt &amp;body, const tir::Buffer &amp;buffer, bool consider_loads, bool consider_stores)</td></tr>
+<tr class="memdesc:af183d4d8ee419f3df68d4be1f29edeea"><td class="mdescLeft">&#160;</td><td class="mdescRight">Infer a regular domain that covers all the calls or provides within the given statement.  <a href="namespacetvm_1_1arith.html#af183d4d8ee419f3df68d4be1f29edeea">More...</a><br /></td></tr>
+<tr class="separator:af183d4d8ee419f3df68d4be1f29edeea"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
 <div class="textblock"><p>Bound deducers. </p>
diff --git a/docs/doxygen/bound_8h_source.html b/docs/doxygen/bound_8h_source.html
index be6aac1..2904d06 100644
--- a/docs/doxygen/bound_8h_source.html
+++ b/docs/doxygen/bound_8h_source.html
@@ -89,9 +89,8 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">bound.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="bound_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more contr [...]
+<a href="bound_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more contr [...]
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="namespacetvm_1_1arith_html_a7f55bb82049e1b5f3a22fbd7b7ef3570"><div class="ttname"><a href="namespacetvm_1_1arith.html#a7f55bb82049e1b5f3a22fbd7b7ef3570">tvm::arith::DomainTouched</a></div><div class="ttdeci">Domain DomainTouched(Stmt body, const te::Tensor &amp;tensor, bool consider_calls, bool consider_provides)</div><div class="ttdoc">Infer a regular domain that covers all the calls or provides within the given statement. </div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TVM </div><div class="ttdef"><b>Definition:</b> var.h:85</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1VarNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1VarNode.html">tvm::tir::VarNode</a></div><div class="ttdoc">A variable node in the IR. </div><div class="ttdef"><b>Definition:</b> var.h:46</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_aa177c432dd27540d34b22c05559324ab"><div class="ttname"><a href="namespacetvm_1_1tir.html#aa177c432dd27540d34b22c05559324ab">tvm::tir::Domain</a></div><div class="ttdeci">Array&lt; Range &gt; Domain</div><div class="ttdef"><b>Definition:</b> var.h:263</div></div>
@@ -100,6 +99,8 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Stmt_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></div><div class="ttdoc">Container of all statements. </div><div class="ttdef"><b>Definition:</b> stmt.h:47</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1Tensor_html"><div class="ttname"><a href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a></div><div class="ttdoc">Tensor structure representing a possible input, or intermediate computation result. </div><div class="ttdef"><b>Definition:</b> tensor.h:52</div></div>
+<div class="ttc" id="namespacetvm_1_1arith_html_af183d4d8ee419f3df68d4be1f29edeea"><div class="ttname"><a href="namespacetvm_1_1arith.html#af183d4d8ee419f3df68d4be1f29edeea">tvm::arith::DomainTouched</a></div><div class="ttdeci">Domain DomainTouched(const Stmt &amp;body, const tir::Buffer &amp;buffer, bool consider_loads, bool consider_stores)</div><div class="ttdoc">Infer a regular domain that covers all the calls or provides within the given statement. </div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1Buffer_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Buffer.html">tvm::tir::Buffer</a></div><div class="ttdoc">Buffer is a symbolic n-darray structure. It is a composition of primitive symbolic types...</div><div class="ttdef"><b>Definition:</b> buffer.h:53</div></div>
 <div class="ttc" id="node_2container_8h_html"><div class="ttname"><a href="node_2container_8h.html">container.h</a></div><div class="ttdoc">Array/Map container in the DSL graph. </div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_a3658d391f2333f405abdd54a9f174c9f"><div class="ttname"><a href="namespacetvm_1_1arith.html#a3658d391f2333f405abdd54a9f174c9f">tvm::arith::DeduceBound</a></div><div class="ttdeci">IntSet DeduceBound(PrimExpr v, PrimExpr cond, const std::unordered_map&lt; const VarNode *, IntSet &gt; &amp;hint_map, const std::unordered_map&lt; const VarNode *, IntSet &gt; &amp;relax_map)</div><div class="ttdoc">Same as DeduceBound with unordered_map signature [...]
 <div class="ttc" id="classtvm_1_1Map_html"><div class="ttname"><a href="classtvm_1_1Map.html">tvm::Map</a></div><div class="ttdoc">Map container of NodeRef-&gt;NodeRef in DSL graph. Map implements copy on write semantics, which means map is mutable but copy will happen when array is referenced in more than two places. </div><div class="ttdef"><b>Definition:</b> container.h:389</div></div>
diff --git a/docs/doxygen/broadcast_8h.html b/docs/doxygen/broadcast_8h.html
index 062257d..dadca50 100644
--- a/docs/doxygen/broadcast_8h.html
+++ b/docs/doxygen/broadcast_8h.html
@@ -161,6 +161,14 @@ Functions</h2></td></tr>
 <tr class="separator:af5aeed86f0e3adba1c78778e04692190"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a7eb1ec922ee948b74be83603601801df"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#a7eb1ec922ee948b74be83603601801df">topi::operator||</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;A, const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;B)</td></tr>
 <tr class="separator:a7eb1ec922ee948b74be83603601801df"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1c4cd01e1e8b4657a63549f07b0dc27a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#a1c4cd01e1e8b4657a63549f07b0dc27a">topi::logical_xor</a> (const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;a, const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;b)</td></tr>
+<tr class="separator:a1c4cd01e1e8b4657a63549f07b0dc27a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a7ace20e8054ddd729cd9b96a2dcbbb60"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#a7ace20e8054ddd729cd9b96a2dcbbb60">topi::logical_xor</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;A, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;B [...]
+<tr class="separator:a7ace20e8054ddd729cd9b96a2dcbbb60"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a53737d9a184a58b42fee00e9425a664b"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#a53737d9a184a58b42fee00e9425a664b">topi::logical_xor</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;A, const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;B, std: [...]
+<tr class="separator:a53737d9a184a58b42fee00e9425a664b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:adce89784aa81692ffe5eedfbd55100cb"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#adce89784aa81692ffe5eedfbd55100cb">topi::logical_xor</a> (const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;A, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;B, std: [...]
+<tr class="separator:adce89784aa81692ffe5eedfbd55100cb"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:adb0b26a18049c20f4c704ffdcfd9b26a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#adb0b26a18049c20f4c704ffdcfd9b26a">topi::bitwise_and</a> (const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;a, const <a class="el" href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a> &amp;b)</td></tr>
 <tr class="separator:adb0b26a18049c20f4c704ffdcfd9b26a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a4963bfc6eba6bab48c51bb8a1a492f0c"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetopi.html#a4963bfc6eba6bab48c51bb8a1a492f0c">topi::bitwise_and</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;A, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a> &amp;B [...]
diff --git a/docs/doxygen/broadcast_8h_source.html b/docs/doxygen/broadcast_8h_source.html
index f8123b5..53954e8 100644
--- a/docs/doxygen/broadcast_8h_source.html
+++ b/docs/doxygen/broadcast_8h_source.html
@@ -89,50 +89,51 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">broadcast.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="broadcast_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
-<div class="ttc" id="namespacetopi_html_a14301866288307e63c2ab414e67f6be3"><div class="ttname"><a href="namespacetopi.html#a14301866288307e63c2ab414e67f6be3">topi::right_shift</a></div><div class="ttdeci">tvm::PrimExpr right_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:355</div></div>
+<a href="broadcast_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
+<div class="ttc" id="namespacetopi_html_a14301866288307e63c2ab414e67f6be3"><div class="ttname"><a href="namespacetopi.html#a14301866288307e63c2ab414e67f6be3">topi::right_shift</a></div><div class="ttdeci">tvm::PrimExpr right_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:368</div></div>
 <div class="ttc" id="namespacetopi_html_abab02d4352666a313c80a1cf73bdec26"><div class="ttname"><a href="namespacetopi.html#abab02d4352666a313c80a1cf73bdec26">topi::broadcast_to</a></div><div class="ttdeci">tvm::te::Tensor broadcast_to(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;output_shape, std::string name=&quot;T_broadcast_to&quot;, std::string tag=kBroadcast)</div><div class="ttdoc">Creates an operation that broadcasts a tensor into a compatible shape a [...]
 <div class="ttc" id="namespacetvm_1_1te_html_aeacae1afc9dd1267cbb5779f9daa4671"><div class="ttname"><a href="namespacetvm_1_1te.html#aeacae1afc9dd1267cbb5779f9daa4671">tvm::te::compute</a></div><div class="ttdeci">Tensor compute(Array&lt; PrimExpr &gt; shape, FCompute fcompute, std::string name=&quot;tensor&quot;, std::string tag=&quot;&quot;, Map&lt; std::string, ObjectRef &gt; attrs={})</div><div class="ttdoc">Construct a new tensor by computing over shape, using the computation rule:  [...]
 <div class="ttc" id="namespacetvm_html_a880f0cd15891153430b669f0b4eca2fe"><div class="ttname"><a href="namespacetvm.html#a880f0cd15891153430b669f0b4eca2fe">tvm::pow</a></div><div class="ttdeci">PrimExpr pow(PrimExpr x, PrimExpr y)</div><div class="ttdoc">Calculate power(x, y) </div></div>
 <div class="ttc" id="namespacetvm_html_ae3fd62ca6008d6f20e4845426ec47aa6"><div class="ttname"><a href="namespacetvm.html#ae3fd62ca6008d6f20e4845426ec47aa6">tvm::truncmod</a></div><div class="ttdeci">PrimExpr truncmod(PrimExpr a, PrimExpr b)</div><div class="ttdoc">compute the remainder of truncdiv </div></div>
 <div class="ttc" id="classtvm_1_1te_1_1TensorNode_html_a0ba732bc2def0d467854585752911351"><div class="ttname"><a href="classtvm_1_1te_1_1TensorNode.html#a0ba732bc2def0d467854585752911351">tvm::te::TensorNode::shape</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; shape</div><div class="ttdoc">The shape of the tensor. </div><div class="ttdef"><b>Definition:</b> tensor.h:167</div></div>
-<div class="ttc" id="namespacetopi_html_adb0b26a18049c20f4c704ffdcfd9b26a"><div class="ttname"><a href="namespacetopi.html#adb0b26a18049c20f4c704ffdcfd9b26a">topi::bitwise_and</a></div><div class="ttdeci">tvm::PrimExpr bitwise_and(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:154</div></div>
-<div class="ttc" id="namespacetopi_html_a3927c7edf09081a9520e0d628b3cff5a"><div class="ttname"><a href="namespacetopi.html#a3927c7edf09081a9520e0d628b3cff5a">topi::floor_divide</a></div><div class="ttdeci">tvm::PrimExpr floor_divide(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:257</div></div>
-<div class="ttc" id="namespacetopi_html_afab96b6fc623d4bbb98bd381435d4dea"><div class="ttname"><a href="namespacetopi.html#afab96b6fc623d4bbb98bd381435d4dea">topi::minimum</a></div><div class="ttdeci">tvm::PrimExpr minimum(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:315</div></div>
-<div class="ttc" id="namespacetopi_html_a7ca4a60628a697c7e589d2fe78984650"><div class="ttname"><a href="namespacetopi.html#a7ca4a60628a697c7e589d2fe78984650">topi::less_equal</a></div><div class="ttdeci">tvm::PrimExpr less_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:434</div></div>
+<div class="ttc" id="namespacetopi_html_adb0b26a18049c20f4c704ffdcfd9b26a"><div class="ttname"><a href="namespacetopi.html#adb0b26a18049c20f4c704ffdcfd9b26a">topi::bitwise_and</a></div><div class="ttdeci">tvm::PrimExpr bitwise_and(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:167</div></div>
+<div class="ttc" id="namespacetopi_html_a3927c7edf09081a9520e0d628b3cff5a"><div class="ttname"><a href="namespacetopi.html#a3927c7edf09081a9520e0d628b3cff5a">topi::floor_divide</a></div><div class="ttdeci">tvm::PrimExpr floor_divide(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:270</div></div>
+<div class="ttc" id="namespacetopi_html_afab96b6fc623d4bbb98bd381435d4dea"><div class="ttname"><a href="namespacetopi.html#afab96b6fc623d4bbb98bd381435d4dea">topi::minimum</a></div><div class="ttdeci">tvm::PrimExpr minimum(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:328</div></div>
+<div class="ttc" id="namespacetopi_html_a7ca4a60628a697c7e589d2fe78984650"><div class="ttname"><a href="namespacetopi.html#a7ca4a60628a697c7e589d2fe78984650">topi::less_equal</a></div><div class="ttdeci">tvm::PrimExpr less_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:447</div></div>
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
-<div class="ttc" id="namespacetopi_html_aae35ca7f2b19e58bfa1fe5af449380d1"><div class="ttname"><a href="namespacetopi.html#aae35ca7f2b19e58bfa1fe5af449380d1">topi::subtract</a></div><div class="ttdeci">tvm::PrimExpr subtract(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:210</div></div>
+<div class="ttc" id="namespacetopi_html_aae35ca7f2b19e58bfa1fe5af449380d1"><div class="ttname"><a href="namespacetopi.html#aae35ca7f2b19e58bfa1fe5af449380d1">topi::subtract</a></div><div class="ttdeci">tvm::PrimExpr subtract(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:223</div></div>
 <div class="ttc" id="namespacetopi_html"><div class="ttname"><a href="namespacetopi.html">topi</a></div><div class="ttdef"><b>Definition:</b> broadcast.h:34</div></div>
 <div class="ttc" id="namespacetvm_html_a9da780393e228969f77aa7550520a582"><div class="ttname"><a href="namespacetvm.html#a9da780393e228969f77aa7550520a582">tvm::min</a></div><div class="ttdeci">PrimExpr min(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take minimum of two values </div></div>
 <div class="ttc" id="namespacetopi_html_a663f04b71464b8bb32a9498a71733200"><div class="ttname"><a href="namespacetopi.html#a663f04b71464b8bb32a9498a71733200">topi::logical_and</a></div><div class="ttdeci">tvm::PrimExpr logical_and(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:126</div></div>
 <div class="ttc" id="classtvm_1_1Array_html"><div class="ttname"><a href="classtvm_1_1Array.html">tvm::Array&lt; tvm::PrimExpr &gt;</a></div></div>
 <div class="ttc" id="namespacetopi_html_acd86f6a79bf0f7cdb1296ae7829f8307"><div class="ttname"><a href="namespacetopi.html#acd86f6a79bf0f7cdb1296ae7829f8307">topi::floor</a></div><div class="ttdeci">Tensor floor(const Tensor &amp;x, std::string name=&quot;T_&quot;&quot;floor&quot;, std::string tag=kElementWise)</div><div class="ttdef"><b>Definition:</b> elemwise.h:53</div></div>
+<div class="ttc" id="namespacetopi_html_a1c4cd01e1e8b4657a63549f07b0dc27a"><div class="ttname"><a href="namespacetopi.html#a1c4cd01e1e8b4657a63549f07b0dc27a">topi::logical_xor</a></div><div class="ttdeci">tvm::PrimExpr logical_xor(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:154</div></div>
 <div class="ttc" id="namespacetvm_html_a25cb3bf64427a16a93529d0a5c40b6de"><div class="ttname"><a href="namespacetvm.html#a25cb3bf64427a16a93529d0a5c40b6de">tvm::floordiv</a></div><div class="ttdeci">PrimExpr floordiv(PrimExpr a, PrimExpr b)</div><div class="ttdoc">compute floor(a / b) </div></div>
 <div class="ttc" id="namespacetopi_html_a794b9155e9ba9d1c9c42a1cff1fb645f"><div class="ttname"><a href="namespacetopi.html#a794b9155e9ba9d1c9c42a1cff1fb645f">topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:35</div></div>
-<div class="ttc" id="namespacetopi_html_a8739fc442527f4063babae06a9b98c82"><div class="ttname"><a href="namespacetopi.html#a8739fc442527f4063babae06a9b98c82">topi::bitwise_or</a></div><div class="ttdeci">tvm::PrimExpr bitwise_or(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:168</div></div>
-<div class="ttc" id="namespacetopi_html_a83fe39f90c5576ae3060e8a5489d2f39"><div class="ttname"><a href="namespacetopi.html#a83fe39f90c5576ae3060e8a5489d2f39">topi::floor_mod</a></div><div class="ttdeci">tvm::PrimExpr floor_mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:289</div></div>
-<div class="ttc" id="namespacetopi_html_a1b031e71272376edbdde858787b91b34"><div class="ttname"><a href="namespacetopi.html#a1b031e71272376edbdde858787b91b34">topi::greater_equal</a></div><div class="ttdeci">tvm::PrimExpr greater_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:421</div></div>
+<div class="ttc" id="namespacetopi_html_a8739fc442527f4063babae06a9b98c82"><div class="ttname"><a href="namespacetopi.html#a8739fc442527f4063babae06a9b98c82">topi::bitwise_or</a></div><div class="ttdeci">tvm::PrimExpr bitwise_or(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:181</div></div>
+<div class="ttc" id="namespacetopi_html_a83fe39f90c5576ae3060e8a5489d2f39"><div class="ttname"><a href="namespacetopi.html#a83fe39f90c5576ae3060e8a5489d2f39">topi::floor_mod</a></div><div class="ttdeci">tvm::PrimExpr floor_mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:302</div></div>
+<div class="ttc" id="namespacetopi_html_a1b031e71272376edbdde858787b91b34"><div class="ttname"><a href="namespacetopi.html#a1b031e71272376edbdde858787b91b34">topi::greater_equal</a></div><div class="ttdeci">tvm::PrimExpr greater_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:434</div></div>
 <div class="ttc" id="broadcast_8h_html_acb867d9c8250a748f59b5a47711b495a"><div class="ttname"><a href="broadcast_8h.html#acb867d9c8250a748f59b5a47711b495a">TOPI_DEFINE_BCAST_OP</a></div><div class="ttdeci">#define TOPI_DEFINE_BCAST_OP(Name, ComputeRule)</div><div class="ttdef"><b>Definition:</b> broadcast.h:69</div></div>
 <div class="ttc" id="broadcast_8h_html_a7f3943bdaba8836ffa7d14457a7d730c"><div class="ttname"><a href="broadcast_8h.html#a7f3943bdaba8836ffa7d14457a7d730c">TOPI_DEFINE_OP_OVERLOAD</a></div><div class="ttdeci">#define TOPI_DEFINE_OP_OVERLOAD(Name, OpName)</div><div class="ttdef"><b>Definition:</b> broadcast.h:101</div></div>
 <div class="ttc" id="namespacetvm_html_a40fcc9952e1ff01a76f3b75dbd368fc1"><div class="ttname"><a href="namespacetvm.html#a40fcc9952e1ff01a76f3b75dbd368fc1">tvm::div</a></div><div class="ttdeci">PrimExpr div(PrimExpr a, PrimExpr b)</div><div class="ttdoc">compute division in C semantics. </div></div>
-<div class="ttc" id="namespacetopi_html_ab98c1bef2250b920060a8d92f2db2d93"><div class="ttname"><a href="namespacetopi.html#ab98c1bef2250b920060a8d92f2db2d93">topi::divide</a></div><div class="ttdeci">tvm::PrimExpr divide(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:238</div></div>
+<div class="ttc" id="namespacetopi_html_ab98c1bef2250b920060a8d92f2db2d93"><div class="ttname"><a href="namespacetopi.html#ab98c1bef2250b920060a8d92f2db2d93">topi::divide</a></div><div class="ttdeci">tvm::PrimExpr divide(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:251</div></div>
 <div class="ttc" id="namespacetvm_html_ab49bad0808ba033343e72ba37b39af2e"><div class="ttname"><a href="namespacetvm.html#ab49bad0808ba033343e72ba37b39af2e">tvm::max</a></div><div class="ttdeci">PrimExpr max(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take maximum of two values </div></div>
-<div class="ttc" id="namespacetopi_html_a78a4d7cbe5668db729e1acf831c79034"><div class="ttname"><a href="namespacetopi.html#a78a4d7cbe5668db729e1acf831c79034">topi::left_shift</a></div><div class="ttdeci">tvm::PrimExpr left_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:341</div></div>
-<div class="ttc" id="namespacetopi_html_ab32a7a76e3a94a1edba52436a1337cb6"><div class="ttname"><a href="namespacetopi.html#ab32a7a76e3a94a1edba52436a1337cb6">topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:270</div></div>
+<div class="ttc" id="namespacetopi_html_a78a4d7cbe5668db729e1acf831c79034"><div class="ttname"><a href="namespacetopi.html#a78a4d7cbe5668db729e1acf831c79034">topi::left_shift</a></div><div class="ttdeci">tvm::PrimExpr left_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:354</div></div>
+<div class="ttc" id="namespacetopi_html_ab32a7a76e3a94a1edba52436a1337cb6"><div class="ttname"><a href="namespacetopi.html#ab32a7a76e3a94a1edba52436a1337cb6">topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:283</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1Tensor_html"><div class="ttname"><a href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a></div><div class="ttdoc">Tensor structure representing a possible input, or intermediate computation result. </div><div class="ttdef"><b>Definition:</b> tensor.h:52</div></div>
 <div class="ttc" id="namespacetopi_html_ac1866543337af365604bc7215a7cf30b"><div class="ttname"><a href="namespacetopi.html#ac1866543337af365604bc7215a7cf30b">topi::logical_or</a></div><div class="ttdeci">tvm::PrimExpr logical_or(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:140</div></div>
-<div class="ttc" id="namespacetopi_html_a9bdc92081dabc451c196060d72aab223"><div class="ttname"><a href="namespacetopi.html#a9bdc92081dabc451c196060d72aab223">topi::greater</a></div><div class="ttdeci">tvm::PrimExpr greater(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:369</div></div>
-<div class="ttc" id="namespacetopi_html_aa894e21659f08e663915f88a16f64b2d"><div class="ttname"><a href="namespacetopi.html#aa894e21659f08e663915f88a16f64b2d">topi::add</a></div><div class="ttdeci">tvm::PrimExpr add(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:196</div></div>
+<div class="ttc" id="namespacetopi_html_a9bdc92081dabc451c196060d72aab223"><div class="ttname"><a href="namespacetopi.html#a9bdc92081dabc451c196060d72aab223">topi::greater</a></div><div class="ttdeci">tvm::PrimExpr greater(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:382</div></div>
+<div class="ttc" id="namespacetopi_html_aa894e21659f08e663915f88a16f64b2d"><div class="ttname"><a href="namespacetopi.html#aa894e21659f08e663915f88a16f64b2d">topi::add</a></div><div class="ttdeci">tvm::PrimExpr add(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:209</div></div>
 <div class="ttc" id="namespacetvm_html_a74b2e0fb912a82f974e2b652f7de8d34"><div class="ttname"><a href="namespacetvm.html#a74b2e0fb912a82f974e2b652f7de8d34">tvm::floormod</a></div><div class="ttdeci">PrimExpr floormod(PrimExpr a, PrimExpr b)</div><div class="ttdoc">compute the remainder of floordiv </div></div>
-<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:395</div></div>
-<div class="ttc" id="namespacetopi_html_ac3e9e4fa02b05853c13be69ec595aad7"><div class="ttname"><a href="namespacetopi.html#ac3e9e4fa02b05853c13be69ec595aad7">topi::multiply</a></div><div class="ttdeci">tvm::PrimExpr multiply(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:224</div></div>
-<div class="ttc" id="namespacetopi_html_a19da66c62c01b4c51d8db2e35b075b09"><div class="ttname"><a href="namespacetopi.html#a19da66c62c01b4c51d8db2e35b075b09">topi::not_equal</a></div><div class="ttdeci">tvm::PrimExpr not_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:408</div></div>
+<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:408</div></div>
+<div class="ttc" id="namespacetopi_html_ac3e9e4fa02b05853c13be69ec595aad7"><div class="ttname"><a href="namespacetopi.html#ac3e9e4fa02b05853c13be69ec595aad7">topi::multiply</a></div><div class="ttdeci">tvm::PrimExpr multiply(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:237</div></div>
+<div class="ttc" id="namespacetopi_html_a19da66c62c01b4c51d8db2e35b075b09"><div class="ttname"><a href="namespacetopi.html#a19da66c62c01b4c51d8db2e35b075b09">topi::not_equal</a></div><div class="ttdeci">tvm::PrimExpr not_equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:421</div></div>
 <div class="ttc" id="tags_8h_html"><div class="ttname"><a href="tags_8h.html">tags.h</a></div><div class="ttdoc">External function interface to rocBLAS libraries. </div></div>
-<div class="ttc" id="namespacetopi_html_a6b003a9c4154ba0040d916918eecb7fe"><div class="ttname"><a href="namespacetopi.html#a6b003a9c4154ba0040d916918eecb7fe">topi::less</a></div><div class="ttdeci">tvm::PrimExpr less(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:382</div></div>
-<div class="ttc" id="namespacetopi_html_a09126c7373a571b8f24e8206c24e6bf5"><div class="ttname"><a href="namespacetopi.html#a09126c7373a571b8f24e8206c24e6bf5">topi::maximum</a></div><div class="ttdeci">tvm::PrimExpr maximum(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:302</div></div>
+<div class="ttc" id="namespacetopi_html_a6b003a9c4154ba0040d916918eecb7fe"><div class="ttname"><a href="namespacetopi.html#a6b003a9c4154ba0040d916918eecb7fe">topi::less</a></div><div class="ttdeci">tvm::PrimExpr less(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:395</div></div>
+<div class="ttc" id="namespacetopi_html_a09126c7373a571b8f24e8206c24e6bf5"><div class="ttname"><a href="namespacetopi.html#a09126c7373a571b8f24e8206c24e6bf5">topi::maximum</a></div><div class="ttdeci">tvm::PrimExpr maximum(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:315</div></div>
 <div class="ttc" id="detail_2broadcast_8h_html"><div class="ttname"><a href="detail_2broadcast_8h.html">broadcast.h</a></div><div class="ttdoc">Detail broadcast. </div></div>
-<div class="ttc" id="namespacetopi_html_a042879ca152f316c9d19f941dbdf7582"><div class="ttname"><a href="namespacetopi.html#a042879ca152f316c9d19f941dbdf7582">topi::power</a></div><div class="ttdeci">tvm::PrimExpr power(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:328</div></div>
-<div class="ttc" id="namespacetopi_html_ad8c706a823afc470753cabb9007925ec"><div class="ttname"><a href="namespacetopi.html#ad8c706a823afc470753cabb9007925ec">topi::bitwise_xor</a></div><div class="ttdeci">tvm::PrimExpr bitwise_xor(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:182</div></div>
+<div class="ttc" id="namespacetopi_html_a042879ca152f316c9d19f941dbdf7582"><div class="ttname"><a href="namespacetopi.html#a042879ca152f316c9d19f941dbdf7582">topi::power</a></div><div class="ttdeci">tvm::PrimExpr power(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:341</div></div>
+<div class="ttc" id="namespacetopi_html_ad8c706a823afc470753cabb9007925ec"><div class="ttname"><a href="namespacetopi.html#ad8c706a823afc470753cabb9007925ec">topi::bitwise_xor</a></div><div class="ttdeci">tvm::PrimExpr bitwise_xor(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:195</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/doxygen/buffer_8h_source.html b/docs/doxygen/buffer_8h_source.html
index 65536b2..ae426cb 100644
--- a/docs/doxygen/buffer_8h_source.html
+++ b/docs/doxygen/buffer_8h_source.html
@@ -130,7 +130,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="namespacetvm_1_1tir_html_a9ac05a14db42ca73da1d3945e7ce2fd1a4500370408bf0077c86a795cbe9dbf39"><div class="ttname"><a href="namespacetvm_1_1tir.html#a9ac05a14db42ca73da1d3945e7ce2fd1a4500370408bf0077c86a795cbe9dbf39">tvm::tir::kDefault</a></div><div class="ttdef"><b>Definition:</b> buffer.h:43</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Buffer_html_ac033e5ebb29588c789152fb01dc99eff"><div class="ttname"><a href="classtvm_1_1tir_1_1Buffer.html#ac033e5ebb29588c789152fb01dc99eff">tvm::tir::Buffer::Buffer</a></div><div class="ttdeci">Buffer(ObjectPtr&lt; Object &gt; n)</div><div class="ttdef"><b>Definition:</b> buffer.h:56</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BufferNode_html_aac30fc17abe8bde34272a854ba74b16a"><div class="ttname"><a href="classtvm_1_1tir_1_1BufferNode.html#aac30fc17abe8bde34272a854ba74b16a">tvm::tir::BufferNode::data_alignment</a></div><div class="ttdeci">int data_alignment</div><div class="ttdoc">Alignment requirement of data pointer in bytes. </div><div class="ttdef"><b>Definition:</b> buffer.h:131</div></div>
-<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:395</div></div>
+<div class="ttc" id="namespacetopi_html_ae10e1fb8c765a9ae15c12f45199375b3"><div class="ttname"><a href="namespacetopi.html#ae10e1fb8c765a9ae15c12f45199375b3">topi::equal</a></div><div class="ttdeci">tvm::PrimExpr equal(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:408</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BufferNode_html_a9b849acfdc74712f4d2bfd9e631ee398"><div class="ttname"><a href="classtvm_1_1tir_1_1BufferNode.html#a9b849acfdc74712f4d2bfd9e631ee398">tvm::tir::BufferNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const BufferNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> buffer.h:155</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_a9ac05a14db42ca73da1d3945e7ce2fd1a08cd53d61aa635081a2f7a0597025663"><div class="ttname"><a href="namespacetvm_1_1tir.html#a9ac05a14db42ca73da1d3945e7ce2fd1a08cd53d61aa635081a2f7a0597025663">tvm::tir::kAutoBroadcast</a></div><div class="ttdef"><b>Definition:</b> buffer.h:45</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_a9b3692f3f081d3b5b84a24cf8082141e"><div class="ttname"><a href="namespacetvm_1_1tir.html#a9b3692f3f081d3b5b84a24cf8082141e">tvm::tir::decl_buffer</a></div><div class="ttdeci">Buffer decl_buffer(Array&lt; PrimExpr &gt; shape, DataType dtype=DataType::Float(32), std::string name=&quot;buffer&quot;)</div><div class="ttdoc">Construct a new buffer given shape, and dtype. </div></div>
diff --git a/docs/doxygen/c__runtime__api_8h_source.html b/docs/doxygen/c__runtime__api_8h_source.html
index c3ceb23..42fb365 100644
--- a/docs/doxygen/c__runtime__api_8h_source.html
+++ b/docs/doxygen/c__runtime__api_8h_source.html
@@ -147,7 +147,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="c__runtime__api_8h_html_acf57d257a6e0841d84ebbd2a339d183e"><div class="ttname"><a href="c__runtime__api_8h.html#acf57d257a6e0841d84ebbd2a339d183e">TVMFunctionHandle</a></div><div class="ttdeci">void * TVMFunctionHandle</div><div class="ttdoc">Handle to packed function handle. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:155</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ad3cce90e23260e30424e9878d9798d33a6aa6b0afb212bea097b2c71eb366214c"><div class="ttname"><a href="c__runtime__api_8h.html#ad3cce90e23260e30424e9878d9798d33a6aa6b0afb212bea097b2c71eb366214c">kTVMNNVMFirst</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:113</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ace8007daffd9f2c6d954c24d870bfcc4"><div class="ttname"><a href="c__runtime__api_8h.html#ace8007daffd9f2c6d954c24d870bfcc4">tvm_index_t</a></div><div class="ttdeci">int64_t tvm_index_t</div><div class="ttdoc">type of array index. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:77</div></div>
-<div class="ttc" id="namespacetopi_html_ab32a7a76e3a94a1edba52436a1337cb6"><div class="ttname"><a href="namespacetopi.html#ab32a7a76e3a94a1edba52436a1337cb6">topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:270</div></div>
+<div class="ttc" id="namespacetopi_html_ab32a7a76e3a94a1edba52436a1337cb6"><div class="ttname"><a href="namespacetopi.html#ab32a7a76e3a94a1edba52436a1337cb6">topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:283</div></div>
 <div class="ttc" id="unionTVMValue_html_a75af73d1e39bc5de7073a008d44b0d16"><div class="ttname"><a href="unionTVMValue.html#a75af73d1e39bc5de7073a008d44b0d16">TVMValue::v_type</a></div><div class="ttdeci">DLDataType v_type</div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:139</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ad3cce90e23260e30424e9878d9798d33a4b2f3688d719f6e7c20ab00659e2383e"><div class="ttname"><a href="c__runtime__api_8h.html#ad3cce90e23260e30424e9878d9798d33a4b2f3688d719f6e7c20ab00659e2383e">kTVMBytes</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:105</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a9363bb701f16ce5bbb381f2a013d25b4"><div class="ttname"><a href="c__runtime__api_8h.html#a9363bb701f16ce5bbb381f2a013d25b4">TVMContext</a></div><div class="ttdeci">DLContext TVMContext</div><div class="ttdoc">The Device information, abstract away common device types. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:125</div></div>
diff --git a/docs/doxygen/classes.html b/docs/doxygen/classes.html
index a42272c..171cda8 100644
--- a/docs/doxygen/classes.html
+++ b/docs/doxygen/classes.html
@@ -90,135 +90,136 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_F">F</a>&#160;|&#160;<a class="qindex" href="#letter_G">G</a>&#160;|&#160;<a class="qindex" href="#letter_H">H</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|& [...]
 <table class="classindex">
 <tr><td rowspan="2" valign="bottom"><a name="letter_A"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;A&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_ [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html">PatternMutator</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool2DAttrs.html">AdaptivePool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModule.html">IRModu [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool3DAttrs.html">AdaptivePool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html">CropAndResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModuleNode.html">IRModuleNode</a> (<a class="el [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_D"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;D&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1IterAdapter.html">IterAdapter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html">PatternTupleNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html">StmtVisitor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::ti [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADT.html">ADT</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>) [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADTObj.html">ADTObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DataType.html">DataType</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html">IterVarAttr</a> (<a class="el" href="namespacetvm_1_1te.html" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateNode.html">AllocateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html">DebugAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttrNode.html">IterVarAttrNode</a> (<a class="el" href="namespacetvm_1_1t [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocStorageAttrs.html">AllocStorageAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html">DeformableConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocTensorAttrs.html">AllocTensorAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html">DenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html">IterVarRelation</a> (<a class="el" href="na [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1Analyzer.html">Analyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DeviceAPI.html">DeviceAPI</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelationNode.html">IterVarRelationNode</a> (<a class="el" href="namespac [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AndNode.html">AndNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html">DeviceCopyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_L"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160; [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1PointerType.html">PointerType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1StrMapNode.html">StrMapNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AnyNode.html">AnyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrs.html">DictAttrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerTypeNode.html">PointerTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign= [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArangeAttrs.html">ArangeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrsNode.html">DictAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html">L2NormalizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgsortAttrs.html">ArgsortAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html">LayerNormAttrs</a> (<a class="el" href [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Array.html">Array</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DivNode.html">DivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Layout.html">Layout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top">< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1ArrayHandler.html">SimpleObjAllocator::ArrayHandler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutAxis.html">Layo [...]
+</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSet.html">IntSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_0 [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html">PatternMutator</a> (<a class="el" href="name [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool2DAttrs.html">AdaptivePool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool3DAttrs.html">AdaptivePool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModule.html">IRModu [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html">CropAndResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModuleNode.html">IRModuleNode</a> (<a class="el" href="namespacetvm.html">tvm< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADT.html">ADT</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_D"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;D&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1IterAdapter.html">IterAdapter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1StoreNode.html">StoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADTObj.html">ADTObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html">PatternVarNode</a> (<a class="el" href="namespacetvm_1_1relay.html">t [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateNode.html">AllocateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DataType.html">DataType</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html">IterVarAttr</a> (<a class="el" href="namespacetvm_1_1te.html" [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocStorageAttrs.html">AllocStorageAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html">DebugAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttrNode.html">IterVarAttrNode</a> (<a class="el" href=" [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocTensorAttrs.html">AllocTensorAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html">DeformableConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> (<a clas [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1Analyzer.html">Analyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html">DenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html">IterVarRelation</a> (<a class="el" href="namespacetvm_1_1te. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AndNode.html">AndNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DeviceAPI.html">DeviceAPI</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelationNode.html">IterVarRelationNode</a> (<a class="el" href="namespacetvm_1_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AnyNode.html">AnyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html">DeviceCopyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_L"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160; [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1PointerTypeNode.html">PointerTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1StructuralEqual.html">StructuralEqual</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArangeAttrs.html">ArangeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrs.html">DictAttrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&# [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgsortAttrs.html">ArgsortAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrsNode.html">DictAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html">L2NormalizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.ht [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Array.html">Array</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html">LayerNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1ArrayHandler.html">SimpleObjAllocator::ArrayHandler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DivNode.html">DivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Layout.html">Layout</a> (<a class="el" [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayNode.html">ArrayNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_E"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;E&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutNode.html">LayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html">PrimFunc</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html">AssertStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html">LayoutTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html">PrimFuncNode</a> (<a class="el" href [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html">AttrDocEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html">LeakyReluAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocVisitor.html">AttrDocVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1LENode.html">LENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160 [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1AttrError.html">AttrError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQNode.html">EQNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign= [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Error.html">Error</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetNode.html">LetNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160; [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1ErrorBuilder.html">ErrorBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetNode.html">LetNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rows [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfoNode.html">AttrFieldInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ErrorReporter.html">ErrorReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&# [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html">AttrInitEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoadNode.html">LoadNode</a> (<a class="el" href="namespacetvm_1_1tir.h [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrInitVisitor.html">AttrInitVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html">LRNAttrs</a> (<a class="e [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorIntrin.html">TensorIntrin</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNonDefaultVisitor.html">AttrNonDefaultVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html">ExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LTNode.html">LTNode</a> (<a class="el" hre [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrNopEntry.html">AttrNopEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html">ExprDeepEqual</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_M"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class=" [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1RampNode.html">RampNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorIntrinCallNode.html">TensorIntrinCallNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Range.html">Range</a> (<a class="el" href="namespacetvm.html">tvm< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Map.html">Map</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MapNode.html">MapNode</a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MatchNode.html">MatchNode</a> (<a class="el" href="namesp [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MaxNode.html">MaxNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html">ExprRewriter</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html">MaxPool1DAttrs [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::re [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool1DAttrs.html">AvgPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" href="namespace [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool2DAttrs.html">AvgPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOpNode.html">ExternOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfo.html">MemoryInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)& [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool3DAttrs.html">AvgPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_F"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;F&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfoNode.html">MemoryInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ReflectionTrait.html">ReflectionTrait</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html">TupleGetItem</a> (<a class="el" href="namespacetvm_1_1relay [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayNode.html">ArrayNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutAxis.html">LayoutAxis</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&# [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html">AssertStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_E"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;E&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutNode.html">LayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html">PrimFunc</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1TakeAttrs.html">TakeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html">AttrDocEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html">LayoutTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html">PrimFuncNode</a> (<a class="el" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocVisitor.html">AttrDocVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html">LeakyReluAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::r [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1AttrError.html">AttrError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1LENode.html">LENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQNode.html">EQNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::rel [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Error.html">Error</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetNode.html">LetNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfoNode.html">AttrFieldInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1ErrorBuilder.html">ErrorBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetNode.html">LetNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td> [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorComputeOpNode.html">TensorComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html">AttrInitEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ErrorReporter.html">ErrorReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::t [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrInitVisitor.html">AttrInitVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoadNode.html">LoadNode</a> (<a class="el" href="namespacetvm_1_1ti [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNonDefaultVisitor.html">AttrNonDefaultVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html">LRNAttrs</a>  [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorIntrinCall.html">TensorIntrinCall</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrNopEntry.html">AttrNopEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html">ExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LTNode.html">LTNode</a> (<a class="el" href="namespacetvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html">ExprDeepEqual</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_M"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><di [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1RampNode.html">RampNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorIntrinNode.html">TensorIntrinNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Range.html">Range</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Map.html">Map</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MatchNode.html">MatchNode</a> (<a class="el" href="namespacetvm_1_1relay.htm [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MaxNode.html">MaxNode</a> (<a class="el" href= [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html">ExprRewriter</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html">MaxPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm:: [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool1DAttrs.html">AvgPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a class="el" href="nam [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool2DAttrs.html">AvgPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool3DAttrs.html">AvgPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOpNode.html">ExternOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfo.html">MemoryInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)& [...]
 <tr><td rowspan="2" valign="bottom"><a name="letter_B"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;B&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MinNode.html">MinNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ReflectionVTable.html">ReflectionVTable</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html">TupleGetItemNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::re [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html">FeatureSet</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html">MirrorPadAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RefRead.html">RefRead</a> (<a class="el" href="namespacetvm_1_1rela [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseAttrsNode.html">BaseAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html">FIFOBufferAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html">MixedModeMutator</a> (<a class="el" href="namespacetvm_1_1rel [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1BaseComputeOpNode.html">BaseComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html">SeqStmt::Flattener</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html">MixedModeVisitor</a> (<a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExpr.html">BaseExpr</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImm.html">FloatImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ModNode.html">ModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExprNode.html">BaseExprNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImmNode.html">FloatImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSet.html">ModularSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFunc.html">BaseFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html">FloorDivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetAnalyzer.html">ModularSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFuncNode.html">BaseFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html">FloorModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html">ModularSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorType.html">BaseTensorType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ForNode.html">ForNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Module.html">Module</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorTypeNode.html">BaseTensorTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FreeNode.html">FreeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ModuleNode.html">ModuleNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runti [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueEqual.html">BaseValueEqual</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1StringObj_1_1FromStd.html">StringObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MulNode.html">MulNode</a> (<a class="el" href="namespacetvm_1_1tir.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueHash.html">BaseValueHash</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Function.html">Function</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html">MultiBoxPriorAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">t [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchNormAttrs.html">BatchNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionBaseNode.html">FunctionBaseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html">MultiBoxTransformLocAttrs</a [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BiasAddAttrs.html">BiasAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html">FunctionNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_N"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class=" [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1RelayRefTypeNode.html">RelayRefTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Type.html">Type</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html">BijectiveLayout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html">RepeatAttrs</a> (<a class="el" href="namespacetvm_1_1rela [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayoutNode.html">BijectiveLayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncType.html">FuncType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</ [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryConv2DAttrs.html">BinaryConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncTypeNode.html">FuncTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html">NdarraySizeAttrs</a> (<a class="el" href="namespacetvm_1_1 [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryDenseAttrs.html">BinaryDenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1FuseNode.html">FuseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NENode.html">NENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::ti [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BinaryOpNode.html">BinaryOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_G"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;G&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor.html">NodeFunctor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ResizeAttrs.html">ResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeData.html">TypeData</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BitPackAttrs.html">BitPackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html">NonMaximumSuppressionAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html">BroadcastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFuncNode.html">GenericFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NotNode.html">NotNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericOpMap.html">GenericOpMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_O"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;O&#160;&#160;</div></td></tr [...]
-</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html">ROIPoolAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html">TypedPackedFunc</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GENode.html">GENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_S"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;S&#160;&#160; [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html">TypedPackedFunc&lt; R(Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html">BufferLoadNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html">GetValidCountsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferNode.html">BufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html">GlobalPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href="namespacetvm_1_1runt [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVar.html">GlobalTypeVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectEqual.html">ObjectEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtim [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVarNode.html">GlobalTypeVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectHash.html">ObjectHash</a> (<a class="el" href="namespacetvm_1_1runtime.htm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BuildConfig.html">BuildConfig</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVar.html">GlobalVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectPtr.html">ObjectPtr</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BuildConfigNode.html">BuildConfigNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarNode.html">GlobalVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#16 [...]
+</td><td rowspan="2" valign="bottom"><a name="letter_F"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;F&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfoNode.html">MemoryInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ReflectionTrait.html">ReflectionTrait</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html">TupleGetItemNode</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MinNode.html">MinNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ReflectionVTable.html">ReflectionVTable</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TupleNode.html">TupleNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseAttrsNode.html">BaseAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html">FeatureSet</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html">MirrorPadAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm:: [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1BaseComputeOpNode.html">BaseComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html">FIFOBufferAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html">MixedModeMutator</a> (<a class="el" h [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExpr.html">BaseExpr</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html">SeqStmt::Flattener</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html">MixedModeVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html"> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExprNode.html">BaseExprNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImm.html">FloatImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ModNode.html">ModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFunc.html">BaseFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImmNode.html">FloatImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSet.html">ModularSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td val [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFuncNode.html">BaseFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html">FloorDivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetAnalyzer.html">ModularSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tv [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorType.html">BaseTensorType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html">FloorModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html">ModularSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorTypeNode.html">BaseTensorTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ForNode.html">ForNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Module.html">Module</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueEqual.html">BaseValueEqual</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FreeNode.html">FreeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ModuleNode.html">ModuleNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueHash.html">BaseValueHash</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1StringObj_1_1FromStd.html">StringObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MulNode.html">MulNode</a> (<a class="el" href="namespacetvm_1_1tir.html"> [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchNormAttrs.html">BatchNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Function.html">Function</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html">MultiBoxPriorAttrs</a> (<a class="el" href="n [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BiasAddAttrs.html">BiasAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionBaseNode.html">FunctionBaseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html">MultiBoxTransformLocAttrs</a> (< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html">BijectiveLayout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html">FunctionNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_N"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="a [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1RelayRefTypeNode.html">RelayRefTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeCall.html">TypeCall</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayoutNode.html">BijectiveLayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html">RepeatAttrs</a> (<a class="el" href="namespacetvm [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryConv2DAttrs.html">BinaryConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncType.html">FuncType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtim [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryDenseAttrs.html">BinaryDenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncTypeNode.html">FuncTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html">NdarraySizeAttrs</a> (<a class="el" href="namespacetvm_1_1re [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BinaryOpNode.html">BinaryOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1FuseNode.html">FuseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NENode.html">NENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#1 [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BitPackAttrs.html">BitPackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_G"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;G&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor.html">NodeFunctor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ResizeAttrs.html">ResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeDataNode.html">TypeDataNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ReverseAttrs.html">ReverseAttrs</a> (<a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html">BroadcastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html">NonMaximumSuppressionAttrs</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFuncNode.html">GenericFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NotNode.html">NotNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</t [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericOpMap.html">GenericOpMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_O"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;O&#160;&#160;</div>< [...]
+</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html">ROIPoolAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html">TypedPackedFunc&lt; R(Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html">BufferLoadNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GENode.html">GENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_S"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;S&#16 [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1TypeFunctor.html">TypeFunctor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferNode.html">BufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html">GetValidCountsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html">GlobalPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href="namespacetvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVar.html">GlobalTypeVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectEqual.html">ObjectEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVarNode.html">GlobalTypeVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectHash.html">ObjectHash</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm:: [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVar.html">GlobalVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectPtr.html">ObjectPtr</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BuildConfig.html">BuildConfig</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarNode.html">GlobalVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160; [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BuildConfigNode.html">BuildConfigNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html">GroupNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">ObjectTypeChecker</a> (<a class="el" href="namespacetvm [...]
 <tr><td rowspan="2" valign="bottom"><a name="letter_C"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;C&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GTNode.html">GTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">ObjectTypeChecker</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSEqualReduce_3_01T_00_01TraitName_00_01false_01_4.html">Sele [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GTNode.html">GTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Array_3_01T_01_4_01_4.html">ObjectTypeChecker&lt; Array&lt; T &gt; &gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSHashRed [...]
 <tr><td rowspan="2" valign="bottom"><a name="letter_H"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;H&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Array_3_01T_01_4_01_4.html">ObjectTypeChecker&lt; Array&lt; T &gt; &gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSHashReduce.html">SelectSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="struc [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Map_3_01K_00_01V_01_4_01_4.html">ObjectTypeChecker&lt; Map&lt; K, V &gt; &gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectS [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html">OnDeviceAttrs</a> (<a class="el" href="namespacetvm_1_1rel [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html">OneHotAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Op.html">Op</a> (<a class [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastAttrs.html">CastAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLContext_01_4.html">Handler&lt; DLContext &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Operation</a> [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastHintAttrs.html">CastHintAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CastNode.html">CastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOpNode.html">HybridOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html">OpImplementation</a> (<a class="el" href="namespacetvm_1_1relay.html [...]
+</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Map_3_01K_00_01V_01_4_01_4.html">ObjectTypeChecker&lt; Map&lt; K, V &gt; &gt;</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSHashReduce_3_01T_00_01TraitName_00_01false_01_4.html">SelectSHashReduce&lt; T, TraitName, false &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::deta [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html">OnDeviceAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectVisitAttrs.html">SelectVisitAttrs</a> (<a class="el" href="namespacetvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html">OneHotAttrs</a> (<a class="el" href="namespacetvm_1_1relay.h [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Op.html">Op</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valig [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Ope [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastAttrs.html">CastAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLContext_01_4.html">Handler&lt; DLContext &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html">Operation [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastHintAttrs.html">CastHintAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CastNode.html">CastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOpNode.html">HybridOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html">OpImplementationNode</a> (<a class="el" href="namespacetvm_1_1re [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Clause.html">Clause</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_I"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;I&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html">OpImplementationNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1Sequential.html">Sequential</a> (<a class="el" href="namespacetvm_1_1transform.html">tvm::transform</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeReporterNode.html">TypeReporterNode</a> (<a clas [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ClauseNode.html">ClauseNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpMap.html">OpMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html">ShapeFuncAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160; [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ClipAttrs.html">ClipAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Id.html">Id</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpNode.html">OpNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td val [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Closure.html">Closure</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IdNode.html">IdNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpRegistry.html">OpRegistry</a> (<a class="el" href="namespacetvm.html">tvm</ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1ClosureObj.html">ClosureObj</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1If.html">If</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html">OpSpecialization</a> (<a class="el" href="names [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1OpMap.html">OpMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html">ShapeFuncAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeVarNode.html">TypeVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ClauseNode.html">ClauseNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpNode.html">OpNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ShapeOfAttrs.html">ShapeOfAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&# [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ClipAttrs.html">ClipAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Id.html">Id</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpRegistry.html">OpRegistry</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html">CmpOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfNode.html">IfNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html">OpSpecializationNode</a> (<a class="el" href="namespacetvm_1_1rel [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducer.html">CommReducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html">IfThenElseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html">OpStrategy</a> (<a class="el" href="namespacetvm_1_1relay.htm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html">CommReducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html">OpStrategyNode</a> (<a class="el" h [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html">CompilerAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Optional.html [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Closure.html">Closure</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IdNode.html">IdNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html">OpSpecialization</a> (<a class="el" href="nam [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1ClosureObj.html">ClosureObj</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1If.html">If</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html">OpSpecializationNode</a> (<a class="el" hre [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html">CmpOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfNode.html">IfNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html">OpStrategy</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducer.html">CommReducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html">IfThenElseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html">OpStrategyNode</a> (<a class="el" href="namespacetvm_1_1r [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html">ComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1OrNode.html">OrNode</a> (<a class="el" href="namespacetvm_1_1tir.ht [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html">ConcatenateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_P"></a><table border= [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html">CommReducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Optional.html">Optional</a> (<a class="el" href="names [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html">CompilerAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1OrNode.html">OrNo [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html">ComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_P"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class [...]
 </td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1SliceLikeAttrs.html">SliceLikeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1Array_1_1ValueConverter.html">Array::ValueConverter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1SoftmaxAttrs.html">SoftmaxAttrs</a> (<a class="el" href="namespacet [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html">ConstantNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html">Pac [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html">ConstIntBound</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteType.html">IncompleteType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">PackedFuncValueConverter</a> (<a class="el" href="nam [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html">ConstIntBoundAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteTypeNode.html">IncompleteTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html"> [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html">ConstIntBoundNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html">InitOpAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4.html">PackedF [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstraintContext.html">ConstraintContext</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1InplaceArrayBase.html">InplaceArrayBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Constructor.html">Constructor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html">InstanceNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html">PackedFuncValue [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ConstructorNode.html">ConstructorNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html">PadAttrs</a> (<a class="el" href="namespacetvm_1_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html">ConstructorValue</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html">IntConstraints</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1Pass.html">Pass</a> (<a class="el" href="namespacetv [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html">ConstructorValueObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html">IntConstraintsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContext.html">PassContext</a> (<a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">NDArray::Container</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html">IntConstraintsTransform</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContextNode.html"> [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html">ConcatenateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1SoftmaxAtt [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html">PackedFunc</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html">ConstantNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConv [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html">ConstIntBound</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteType.html">IncompleteType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html">PackedFuncValueConverter [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html">ConstIntBoundAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteTypeNode.html">IncompleteTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4.html">PackedFuncV [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html">ConstIntBoundNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html">InitOpAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4.html">P [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstraintContext.html">ConstraintContext</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1InplaceArrayBase.html">InplaceArrayBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runt [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Constructor.html">Constructor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html">InstanceNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html">PadAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::r [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ConstructorNode.html">ConstructorNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1Pass.html">Pass</a> (<a class="el" href="namespacetvm_1_1trans [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html">ConstructorValue</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html">IntConstraints</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContext.html">PassContext</a> (<a class="el" hre [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html">NDArray::ContainerBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html">IntConstraintsTransformNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassIn [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html">Conv1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Integer.html">Integer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfoNode.html">PassInfoNode</a> (<a class="el" href="namespacetvm_1_1transform.html">tvm::transf [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html">Conv1DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html">InterpreterClosure</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassNode.html">PassNode</a> (<a cla [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html">ConstructorValueObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html">IntConstraintsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContextNode.html">PassContextNode [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">NDArray::Container</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html">IntConstraintsTransform</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfo.html">PassInf [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html">NDArray::ContainerBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html">IntConstraintsTransformNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassIn [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html">Conv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html">InterpreterClosureObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Pattern.html">Pattern</a> (<a class="el" href="name [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html">Conv2DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImm.html">IntImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html">PatternConstructor</a> (<a class="el" href="namespacetvm_1_1rel [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImmNode.html">IntImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html">PatternConstructorNode</a> (<a class="el" href="names [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSet.html">IntSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html">Pa [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html">Conv1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Integer.html">Integer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassNode.html">PassNode</a> (<a class="el" href="namespacetvm_1_1transform.html">tvm::transform</a>) [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html">Conv1DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html">InterpreterClosure</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Pattern.html">Pattern</a> (<a class="el [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html">Conv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html">InterpreterClosureObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html">PatternConstructor</a> (<a [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html">Conv2DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImm.html">IntImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html">PatternConstructorNode</a> (<a class="el" href="namespacetv [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImmNode.html">IntImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html">PatternFunctor</a> (<a class="el" href="namespacetvm_1_1relay [...]
 <tr><td></td><td></td><td></td><td></td><td></td></tr>
 </table>
 <div class="qindex"><a class="qindex" href="#letter_A">A</a>&#160;|&#160;<a class="qindex" href="#letter_B">B</a>&#160;|&#160;<a class="qindex" href="#letter_C">C</a>&#160;|&#160;<a class="qindex" href="#letter_D">D</a>&#160;|&#160;<a class="qindex" href="#letter_E">E</a>&#160;|&#160;<a class="qindex" href="#letter_F">F</a>&#160;|&#160;<a class="qindex" href="#letter_G">G</a>&#160;|&#160;<a class="qindex" href="#letter_H">H</a>&#160;|&#160;<a class="qindex" href="#letter_I">I</a>&#160;|& [...]
diff --git a/docs/doxygen/classtvm_1_1BaseAttrsNode.html b/docs/doxygen/classtvm_1_1BaseAttrsNode.html
index 144687e..2988097 100644
--- a/docs/doxygen/classtvm_1_1BaseAttrsNode.html
+++ b/docs/doxygen/classtvm_1_1BaseAttrsNode.html
@@ -104,7 +104,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <p>Inherits Object.</p>
 
-<p>Inherited by <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AdaptivePool2DAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AdaptivePool3DAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AllocStorageAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AllocTensorAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; ArangeAttrs &gt;</a>, <a c [...]
+<p>Inherited by <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AdaptivePool2DAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AdaptivePool3DAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AllocStorageAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; AllocTensorAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html">tvm::AttrsNode&lt; ArangeAttrs &gt;</a>, <a c [...]
 <div class="dynheader">
 Collaboration diagram for tvm::BaseAttrsNode:</div>
 <div class="dyncontent">
@@ -252,7 +252,7 @@ Static Public Attributes</h2></td></tr>
 </dl>
 <dl class="section note"><dt>Note</dt><dd>This function throws when the required field is not present. </dd></dl>
 
-<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532 [...]
+<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532559a">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acfba199ef906818f35432d2e5532 [...]
 
 </div>
 </div>
@@ -314,7 +314,7 @@ template&lt;typename... Args&gt; </div>
 <p>Get the field information. </p>
 <dl class="section return"><dt>Returns</dt><dd>The fields in the <a class="el" href="classtvm_1_1Attrs.html" title="Managed reference to BaseAttrsNode. ">Attrs</a>. </dd></dl>
 
-<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6 [...]
+<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6a981">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acefe615381b5d881870af9db7ce6 [...]
 
 </div>
 </div>
@@ -398,7 +398,7 @@ template&lt;typename... Args&gt; </div>
 </table>
 </div><div class="memdoc">
 
-<p>Reimplemented in <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a3 [...]
+<p>Reimplemented in <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a34a9c72">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#a5da687ced06b4f2dfa04b142a3 [...]
 
 </div>
 </div>
@@ -433,7 +433,7 @@ template&lt;typename... Args&gt; </div>
   </dd>
 </dl>
 
-<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da88 [...]
+<p>Implemented in <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; DerivedType &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; Conv2DWinogradNNPACKWeightTransformAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da885205">tvm::AttrsNode&lt; ReduceAttrs &gt;</a>, <a class="el" href="classtvm_1_1AttrsNode.html#acd05137ba529ac7cd07053e3da88 [...]
 
 </div>
 </div>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferLoadNode.html b/docs/doxygen/classtvm_1_1tir_1_1BufferLoadNode.html
index ce99f6a..e467a95 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1BufferLoadNode.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferLoadNode.html
@@ -161,7 +161,7 @@ Static Public Attributes</h2></td></tr>
 </table>
 <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
 <div class="textblock"><p>Load value from the high dimension buffer. </p>
-<div class="fragment"><div class="line">value = <a class="code" href="classtvm_1_1tir_1_1BufferLoadNode.html#ac5c69453b84222037c5d3e7a2f1d4316">buffer</a>[i, j];</div></div><!-- fragment --> <dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> </dd></dl>
+<div class="fragment"><div class="line">value = <a class="code" href="classtvm_1_1tir_1_1BufferLoadNode.html#ac5c69453b84222037c5d3e7a2f1d4316">buffer</a>[i, j];</div></div><!-- fragment --> <dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferStore.html" title="Managed reference to BufferStoreNode. ">BufferStore</a> </dd></dl>
 </div><h2 class="groupheader">Member Function Documentation</h2>
 <a class="anchor" id="ac8a28a67efbb3d6dd4e3af5463bca078"></a>
 <div class="memitem">
diff --git a/docs/doxygen/dir_000019_000010.html b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize-members.html
similarity index 60%
copy from docs/doxygen/dir_000019_000010.html
copy to docs/doxygen/classtvm_1_1tir_1_1BufferRealize-members.html
index 43c04e0..6bfe600 100644
--- a/docs/doxygen/dir_000019_000010.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize-members.html
@@ -4,7 +4,7 @@
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.11"/>
-<title>tvm: include/tvm/te -&gt; tir Relation</title>
+<title>tvm: Member List</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
@@ -39,7 +39,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="namespaces.html"><span>Namespaces</span></a></li>
-      <li><a href="annotated.html"><span>Classes</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Classes</span></a></li>
       <li><a href="files.html"><span>Files</span></a></li>
       <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
@@ -59,6 +59,14 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
       </li>
     </ul>
   </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Class&#160;List</span></a></li>
+      <li><a href="classes.html"><span>Class&#160;Index</span></a></li>
+      <li><a href="inherits.html"><span>Class&#160;Hierarchy</span></a></li>
+      <li><a href="functions.html"><span>Class&#160;Members</span></a></li>
+    </ul>
+  </div>
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
@@ -75,11 +83,21 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_b4c7d8e826c599ba55146c099a14beb5.html">tvm</a></li><li class="navelem"><a class="el" href="dir_f97d855a3173728370e632aa77170e34.html">te</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a></li>  </ul>
 </div>
 </div><!-- top -->
+<div class="header">
+  <div class="headertitle">
+<div class="title">tvm::tir::BufferRealize Member List</div>  </div>
+</div><!--header-->
 <div class="contents">
-<h3>te &rarr; tir Relation</h3><table class="dirtab"><tr class="dirtab"><th class="dirtab">File in include/tvm/te</th><th class="dirtab">Includes file in include/tvm/tir</th></tr><tr class="dirtab"><td class="dirtab"><a class="el" href="autodiff_8h.html">autodiff.h</a></td><td class="dirtab"><a class="el" href="tir_2expr_8h.html">expr.h</a></td></tr><tr class="dirtab"><td class="dirtab"><a class="el" href="operation_8h.html">operation.h</a></td><td class="dirtab"><a class="el" href="buff [...]
+
+<p>This is the complete list of members for <a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">tvm::tir::BufferRealize</a>, including all inherited members.</p>
+<table class="directory">
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html#a73580bb841fd787883ecad8bf04c0056">BufferRealize</a>(Buffer buffer, Array&lt; Range &gt; bounds, PrimExpr condition, Stmt body)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">tvm::tir::BufferRealize</a></td><td class="entry"><span class="mlabel">explicit</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html#aff41aa8876a3cdafb2d760ecba8674d2">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a>(BufferRealize, Stmt, BufferRealizeNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">tvm::tir::BufferRealize</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Stmt.html#aaca2f7bd4110028470b82825bcbb87ac">TVM_DEFINE_OBJECT_REF_METHODS</a>(Stmt, ObjectRef, StmtNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></td><td class="entry"></td></tr>
+</table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
 Generated by &#160;<a href="http://www.doxygen.org/index.html">
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize.html
similarity index 68%
copy from docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
copy to docs/doxygen/classtvm_1_1tir_1_1BufferRealize.html
index 7288eba..7a9aba4 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize.html
@@ -4,7 +4,7 @@
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.11"/>
-<title>tvm: tvm::tir::BufferStore Class Reference</title>
+<title>tvm: tvm::tir::BufferRealize Class Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
@@ -83,44 +83,50 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a></li>  </ul>
 </div>
 </div><!-- top -->
 <div class="header">
   <div class="summary">
 <a href="#pub-methods">Public Member Functions</a> &#124;
-<a href="classtvm_1_1tir_1_1BufferStore-members.html">List of all members</a>  </div>
+<a href="classtvm_1_1tir_1_1BufferRealize-members.html">List of all members</a>  </div>
   <div class="headertitle">
-<div class="title">tvm::tir::BufferStore Class Reference</div>  </div>
+<div class="title">tvm::tir::BufferRealize Class Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
+<p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate ...">BufferRealizeNode</a>.  
+ <a href="classtvm_1_1tir_1_1BufferRealize.html#details">More...</a></p>
+
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
-Inheritance diagram for tvm::tir::BufferStore:</div>
+Inheritance diagram for tvm::tir::BufferRealize:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferStore__inherit__graph.svg" width="211" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferRealize__inherit__graph.svg" width="230" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
-Collaboration diagram for tvm::tir::BufferStore:</div>
+Collaboration diagram for tvm::tir::BufferRealize:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferStore__coll__graph.svg" width="211" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferRealize__coll__graph.svg" width="230" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
 Public Member Functions</h2></td></tr>
-<tr class="memitem:af7e9dbddd9b6f3a604067d805db61156"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html#af7e9dbddd9b6f3a604067d805db61156">BufferStore</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> buffer, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> value, <a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_ [...]
-<tr class="separator:af7e9dbddd9b6f3a604067d805db61156"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ad1941ecd1ad16134af970e7e845a676b"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html#ad1941ecd1ad16134af970e7e845a676b">TVM_DEFINE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a>, <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNod [...]
-<tr class="separator:ad1941ecd1ad16134af970e7e845a676b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a73580bb841fd787883ecad8bf04c0056"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html#a73580bb841fd787883ecad8bf04c0056">BufferRealize</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> buffer, <a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt; bounds, <a class="el" href="classtv [...]
+<tr class="separator:a73580bb841fd787883ecad8bf04c0056"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aff41aa8876a3cdafb2d760ecba8674d2"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html#aff41aa8876a3cdafb2d760ecba8674d2">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a>, <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode. [...]
+<tr class="separator:aff41aa8876a3cdafb2d760ecba8674d2"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pub_methods_classtvm_1_1tir_1_1Stmt"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_classtvm_1_1tir_1_1Stmt')"><img src="closed.png" alt="-"/>&#160;Public Member Functions inherited from <a class="el" href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></td></tr>
 <tr class="memitem:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1Stmt.html#aaca2f7bd4110028470b82825bcbb87ac">TVM_DEFINE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, ObjectRef, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
 <tr class="separator:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
-<a class="anchor" id="af7e9dbddd9b6f3a604067d805db61156"></a>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate ...">BufferRealizeNode</a>. </p>
+<dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate ...">BufferRealizeNode</a> </dd></dl>
+</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
+<a class="anchor" id="a73580bb841fd787883ecad8bf04c0056"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -128,7 +134,7 @@ Public Member Functions</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">tvm::tir::BufferStore::BufferStore </td>
+          <td class="memname">tvm::tir::BufferRealize::BufferRealize </td>
           <td>(</td>
           <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td>
           <td class="paramname"><em>buffer</em>, </td>
@@ -136,14 +142,20 @@ Public Member Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td>
+          <td class="paramname"><em>bounds</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
           <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
-          <td class="paramname"><em>value</em>, </td>
+          <td class="paramname"><em>condition</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &gt;&#160;</td>
-          <td class="paramname"><em>indices</em>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td>
+          <td class="paramname"><em>body</em>&#160;</td>
         </tr>
         <tr>
           <td></td>
@@ -161,14 +173,14 @@ Public Member Functions</h2></td></tr>
 </div>
 </div>
 <h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="ad1941ecd1ad16134af970e7e845a676b"></a>
+<a class="anchor" id="aff41aa8876a3cdafb2d760ecba8674d2"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">tvm::tir::BufferStore::TVM_DEFINE_OBJECT_REF_METHODS </td>
+          <td class="memname">tvm::tir::BufferRealize::TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a>&#160;</td>
           <td class="paramname">, </td>
         </tr>
         <tr>
@@ -180,7 +192,7 @@ Public Member Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a>&#160;</td>
           <td class="paramname">&#160;</td>
         </tr>
         <tr>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode-members.html
similarity index 61%
copy from docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html
copy to docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode-members.html
index 7e66088..4874b1c 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode-members.html
@@ -83,30 +83,31 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">PrefetchNode</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a></li>  </ul>
 </div>
 </div><!-- top -->
 <div class="header">
   <div class="headertitle">
-<div class="title">tvm::tir::PrefetchNode Member List</div>  </div>
+<div class="title">tvm::tir::BufferRealizeNode Member List</div>  </div>
 </div><!--header-->
 <div class="contents">
 
-<p>This is the complete list of members for <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a>, including all inherited members.</p>
+<p>This is the complete list of members for <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a>, including all inherited members.</p>
 <table class="directory">
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#a463ce5a124a6c222706888122bb44865">_type_has_method_sequal_reduce</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab7e026e32383e67e620719b025e00056">_type_has_method_shash_reduce</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a4b0e453459435cdd5dced95c419f95f8">_type_key</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a9ce8a82f03dd23e200ec6c611fddeb9c">bounds</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aaeccff8519185fc44caa07005ef56af3">dtype</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a59428251589e635271007c8b92a051e0">func</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a106bdc39611c6e46c78b7d6920c5c51d">make</a>(FunctionRef func, int value_index, DataType dtype, Region bounds)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a40defcee76278429e8c6c3c981206d36">SEqualReduce</a>(const PrefetchNode *other, SEqualReducer equal) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#ad560931be434382ad0be20bb6bb7b827">SHashReduce</a>(SHashReducer hash_reduce) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ae9ae849c643a8f701b7f20f17a8432ba">_type_key</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ab332fa7cb914acec5456d63a698c7d34">body</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ac111908806003589f64a8eb7b068272f">bounds</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a49068e03d737011da9d55ebf52139eac">buffer</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a140f2be6b82a3594a8f351e53178dd57">BufferRealizeNode</a>()=default</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a60124ff42b906a73b6e755d62b740ecc">BufferRealizeNode</a>(Buffer buffer, Array&lt; Range &gt; bounds, PrimExpr condition, Stmt body)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a8ab45a20cd77a3c80187355aa211a9b9">condition</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a9c1ef5aafa678f60f9eba1d41816838d">SEqualReduce</a>(const BufferRealizeNode *other, SEqualReducer equal) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#adab031a0226cf61003f077e3fe043514">SHashReduce</a>(SHashReducer hash_reduce) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab4627fb1adbea88d4fe3a33679569421">TVM_DECLARE_BASE_OBJECT_INFO</a>(StmtNode, Object)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a2ad09dce7433dbd22eb1432a3e77f474">TVM_DECLARE_FINAL_OBJECT_INFO</a>(PrefetchNode, StmtNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a0c2bdc2deb363f41b0362c6a9ed4296f">value_index</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a491c03fa62a96b66b7ea0c8e9095b98e">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a8d39e15d23d3f0ffc35d5092619d7ef8">TVM_DECLARE_FINAL_OBJECT_INFO</a>(BufferRealizeNode, StmtNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a1e66d8e7ccb2d0ab73a44775edabb38d">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">tvm::tir::BufferRealizeNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
 </table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1RealizeNode.html b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode.html
similarity index 58%
copy from docs/doxygen/classtvm_1_1tir_1_1RealizeNode.html
copy to docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode.html
index 6d6a08b..1a42730 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1RealizeNode.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode.html
@@ -4,7 +4,7 @@
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.11"/>
-<title>tvm: tvm::tir::RealizeNode Class Reference</title>
+<title>tvm: tvm::tir::BufferRealizeNode Class Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
@@ -83,82 +83,74 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html">RealizeNode</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a></li>  </ul>
 </div>
 </div><!-- top -->
 <div class="header">
   <div class="summary">
 <a href="#pub-methods">Public Member Functions</a> &#124;
-<a href="#pub-static-methods">Static Public Member Functions</a> &#124;
 <a href="#pub-attribs">Public Attributes</a> &#124;
 <a href="#pub-static-attribs">Static Public Attributes</a> &#124;
-<a href="classtvm_1_1tir_1_1RealizeNode-members.html">List of all members</a>  </div>
+<a href="classtvm_1_1tir_1_1BufferRealizeNode-members.html">List of all members</a>  </div>
   <div class="headertitle">
-<div class="title">tvm::tir::RealizeNode Class Reference</div>  </div>
+<div class="title">tvm::tir::BufferRealizeNode Class Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
-<p>Annotate the bounds where func need to be written and read in body. We will need to allocate space for the corresponding regions.  
- <a href="classtvm_1_1tir_1_1RealizeNode.html#details">More...</a></p>
+<p>Annotate the region where the buffer need to be read and write in the body. We only need to allocate the space for the corresponding region.  
+ <a href="classtvm_1_1tir_1_1BufferRealizeNode.html#details">More...</a></p>
 
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
-Inheritance diagram for tvm::tir::RealizeNode:</div>
+Inheritance diagram for tvm::tir::BufferRealizeNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1RealizeNode__inherit__graph.svg" width="284" height="544"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferRealizeNode__inherit__graph.svg" width="284" height="530"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
-Collaboration diagram for tvm::tir::RealizeNode:</div>
+Collaboration diagram for tvm::tir::BufferRealizeNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1RealizeNode__coll__graph.svg" width="1000" height="1027"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferRealizeNode__coll__graph.svg" width="904" height="851"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
 Public Member Functions</h2></td></tr>
-<tr class="memitem:a81eb59b0034a600c759d231cb23a1d13"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a81eb59b0034a600c759d231cb23a1d13">VisitAttrs</a> (<a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> *v)</td></tr>
-<tr class="separator:a81eb59b0034a600c759d231cb23a1d13"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a7b03b82b28f358f5cf41a0a846064cd7"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a7b03b82b28f358f5cf41a0a846064cd7">SEqualReduce</a> (const <a class="el" href="classtvm_1_1tir_1_1RealizeNode.html">RealizeNode</a> *other, <a class="el" href="classtvm_1_1SEqualReducer.html">SEqualReducer</a> equal) const </td></tr>
-<tr class="separator:a7b03b82b28f358f5cf41a0a846064cd7"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a579bb23d02358538a7036e2618758bdc"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a579bb23d02358538a7036e2618758bdc">SHashReduce</a> (<a class="el" href="classtvm_1_1SHashReducer.html">SHashReducer</a> hash_reduce) const </td></tr>
-<tr class="separator:a579bb23d02358538a7036e2618758bdc"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ae71417374f0059376826f6ad13d60e1c"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#ae71417374f0059376826f6ad13d60e1c">TVM_DECLARE_FINAL_OBJECT_INFO</a> (<a class="el" href="classtvm_1_1tir_1_1RealizeNode.html">RealizeNode</a>, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
-<tr class="separator:ae71417374f0059376826f6ad13d60e1c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1e66d8e7ccb2d0ab73a44775edabb38d"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a1e66d8e7ccb2d0ab73a44775edabb38d">VisitAttrs</a> (<a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> *v)</td></tr>
+<tr class="separator:a1e66d8e7ccb2d0ab73a44775edabb38d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a9c1ef5aafa678f60f9eba1d41816838d"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a9c1ef5aafa678f60f9eba1d41816838d">SEqualReduce</a> (const <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> *other, <a class="el" href="classtvm_1_1SEqualReducer.html">SEqualReducer</a> equal) const </td></tr>
+<tr class="separator:a9c1ef5aafa678f60f9eba1d41816838d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:adab031a0226cf61003f077e3fe043514"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#adab031a0226cf61003f077e3fe043514">SHashReduce</a> (<a class="el" href="classtvm_1_1SHashReducer.html">SHashReducer</a> hash_reduce) const </td></tr>
+<tr class="separator:adab031a0226cf61003f077e3fe043514"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a140f2be6b82a3594a8f351e53178dd57"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a140f2be6b82a3594a8f351e53178dd57">BufferRealizeNode</a> ()=default</td></tr>
+<tr class="separator:a140f2be6b82a3594a8f351e53178dd57"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a60124ff42b906a73b6e755d62b740ecc"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a60124ff42b906a73b6e755d62b740ecc">BufferRealizeNode</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a49068e03d737011da9d55ebf52139eac">buffer</a>, <a class="el" href="classtvm_1_1Array.html" [...]
+<tr class="separator:a60124ff42b906a73b6e755d62b740ecc"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a8d39e15d23d3f0ffc35d5092619d7ef8"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a8d39e15d23d3f0ffc35d5092619d7ef8">TVM_DECLARE_FINAL_OBJECT_INFO</a> (<a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a>, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
+<tr class="separator:a8d39e15d23d3f0ffc35d5092619d7ef8"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pub_methods_classtvm_1_1tir_1_1StmtNode"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_classtvm_1_1tir_1_1StmtNode')"><img src="closed.png" alt="-"/>&#160;Public Member Functions inherited from <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td></tr>
 <tr class="memitem:ab4627fb1adbea88d4fe3a33679569421 inherit pub_methods_classtvm_1_1tir_1_1StmtNode"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab4627fb1adbea88d4fe3a33679569421">TVM_DECLARE_BASE_OBJECT_INFO</a> (<a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>, Object)</td></tr>
 <tr class="separator:ab4627fb1adbea88d4fe3a33679569421 inherit pub_methods_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
-Static Public Member Functions</h2></td></tr>
-<tr class="memitem:aa64ed79c18690a0e76f44eb0d736074b"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#aa64ed79c18690a0e76f44eb0d736074b">make</a> (<a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> <a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#ac74657f89ed01d27c42a123aa9c4e1d0">func< [...]
-<tr class="separator:aa64ed79c18690a0e76f44eb0d736074b"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
 Public Attributes</h2></td></tr>
-<tr class="memitem:ac74657f89ed01d27c42a123aa9c4e1d0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#ac74657f89ed01d27c42a123aa9c4e1d0">func</a></td></tr>
-<tr class="memdesc:ac74657f89ed01d27c42a123aa9c4e1d0"><td class="mdescLeft">&#160;</td><td class="mdescRight">The function to be realized.  <a href="#ac74657f89ed01d27c42a123aa9c4e1d0">More...</a><br /></td></tr>
-<tr class="separator:ac74657f89ed01d27c42a123aa9c4e1d0"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ab126f401e72fe4b9b1ceee3a9dffaaef"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#ab126f401e72fe4b9b1ceee3a9dffaaef">value_index</a></td></tr>
-<tr class="memdesc:ab126f401e72fe4b9b1ceee3a9dffaaef"><td class="mdescLeft">&#160;</td><td class="mdescRight">The output value index if func's value is a tuple.  <a href="#ab126f401e72fe4b9b1ceee3a9dffaaef">More...</a><br /></td></tr>
-<tr class="separator:ab126f401e72fe4b9b1ceee3a9dffaaef"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a94cf509b9b13b5975fde6e04abf15314"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a94cf509b9b13b5975fde6e04abf15314">dtype</a></td></tr>
-<tr class="memdesc:a94cf509b9b13b5975fde6e04abf15314"><td class="mdescLeft">&#160;</td><td class="mdescRight">The data type of the array.  <a href="#a94cf509b9b13b5975fde6e04abf15314">More...</a><br /></td></tr>
-<tr class="separator:a94cf509b9b13b5975fde6e04abf15314"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a5e2e8544a255e076311ff44eee46b9c7"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a5e2e8544a255e076311ff44eee46b9c7">bounds</a></td></tr>
-<tr class="memdesc:a5e2e8544a255e076311ff44eee46b9c7"><td class="mdescLeft">&#160;</td><td class="mdescRight">Bounds to be realized.  <a href="#a5e2e8544a255e076311ff44eee46b9c7">More...</a><br /></td></tr>
-<tr class="separator:a5e2e8544a255e076311ff44eee46b9c7"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a19b83a431ad0ac0d5c361b359f0bb0fa"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#a19b83a431ad0ac0d5c361b359f0bb0fa">condition</a></td></tr>
-<tr class="memdesc:a19b83a431ad0ac0d5c361b359f0bb0fa"><td class="mdescLeft">&#160;</td><td class="mdescRight">Only realize if condition holds.  <a href="#a19b83a431ad0ac0d5c361b359f0bb0fa">More...</a><br /></td></tr>
-<tr class="separator:a19b83a431ad0ac0d5c361b359f0bb0fa"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:aa8a89873f73042f8784913b7f088297e"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#aa8a89873f73042f8784913b7f088297e">body</a></td></tr>
-<tr class="memdesc:aa8a89873f73042f8784913b7f088297e"><td class="mdescLeft">&#160;</td><td class="mdescRight">The body of realization.  <a href="#aa8a89873f73042f8784913b7f088297e">More...</a><br /></td></tr>
-<tr class="separator:aa8a89873f73042f8784913b7f088297e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a49068e03d737011da9d55ebf52139eac"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a49068e03d737011da9d55ebf52139eac">buffer</a></td></tr>
+<tr class="memdesc:a49068e03d737011da9d55ebf52139eac"><td class="mdescLeft">&#160;</td><td class="mdescRight">The buffer variable.  <a href="#a49068e03d737011da9d55ebf52139eac">More...</a><br /></td></tr>
+<tr class="separator:a49068e03d737011da9d55ebf52139eac"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac111908806003589f64a8eb7b068272f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ac111908806003589f64a8eb7b068272f">bounds</a></td></tr>
+<tr class="memdesc:ac111908806003589f64a8eb7b068272f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Bounds to be realized.  <a href="#ac111908806003589f64a8eb7b068272f">More...</a><br /></td></tr>
+<tr class="separator:ac111908806003589f64a8eb7b068272f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a8ab45a20cd77a3c80187355aa211a9b9"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#a8ab45a20cd77a3c80187355aa211a9b9">condition</a></td></tr>
+<tr class="memdesc:a8ab45a20cd77a3c80187355aa211a9b9"><td class="mdescLeft">&#160;</td><td class="mdescRight">Only realize if condition holds.  <a href="#a8ab45a20cd77a3c80187355aa211a9b9">More...</a><br /></td></tr>
+<tr class="separator:a8ab45a20cd77a3c80187355aa211a9b9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab332fa7cb914acec5456d63a698c7d34"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ab332fa7cb914acec5456d63a698c7d34">body</a></td></tr>
+<tr class="memdesc:ab332fa7cb914acec5456d63a698c7d34"><td class="mdescLeft">&#160;</td><td class="mdescRight">The body of realization.  <a href="#ab332fa7cb914acec5456d63a698c7d34">More...</a><br /></td></tr>
+<tr class="separator:ab332fa7cb914acec5456d63a698c7d34"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-attribs"></a>
 Static Public Attributes</h2></td></tr>
-<tr class="memitem:ad65cce50b516f20690ae7092e6b35c69"><td class="memItemLeft" align="right" valign="top">static constexpr const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html#ad65cce50b516f20690ae7092e6b35c69">_type_key</a> = &quot;Realize&quot;</td></tr>
-<tr class="separator:ad65cce50b516f20690ae7092e6b35c69"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ae9ae849c643a8f701b7f20f17a8432ba"><td class="memItemLeft" align="right" valign="top">static constexpr const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html#ae9ae849c643a8f701b7f20f17a8432ba">_type_key</a> = &quot;BufferRealize&quot;</td></tr>
+<tr class="separator:ae9ae849c643a8f701b7f20f17a8432ba"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td colspan="2" onclick="javascript:toggleInherit('pub_static_attribs_classtvm_1_1tir_1_1StmtNode')"><img src="closed.png" alt="-"/>&#160;Static Public Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td></tr>
 <tr class="memitem:af5d8bc86681742ef0bca9625751baccc inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memItemLeft" align="right" valign="top">static constexpr const char *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#af5d8bc86681742ef0bca9625751baccc">_type_key</a> = &quot;Stmt&quot;</td></tr>
 <tr class="separator:af5d8bc86681742ef0bca9625751baccc inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -168,9 +160,11 @@ Static Public Attributes</h2></td></tr>
 <tr class="separator:ab7e026e32383e67e620719b025e00056 inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
-<div class="textblock"><p>Annotate the bounds where func need to be written and read in body. We will need to allocate space for the corresponding regions. </p>
-</div><h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="aa64ed79c18690a0e76f44eb0d736074b"></a>
+<div class="textblock"><p>Annotate the region where the buffer need to be read and write in the body. We only need to allocate the space for the corresponding region. </p>
+<dl class="section note"><dt>Note</dt><dd>There should be at most one <a class="el" href="classtvm_1_1tir_1_1BufferRealize.html" title="Managed reference to BufferRealizeNode. ">BufferRealize</a> for each buffer. <a class="el" href="classtvm_1_1tir_1_1BufferRealize.html" title="Managed reference to BufferRealizeNode. ">BufferRealize</a> is not necessary for external buffers, since they are assumed to be fully allocated.</dd></dl>
+<dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a>, <a class="el" href="classtvm_1_1tir_1_1BufferStore.html" title="Managed reference to BufferStoreNode. ">BufferStore</a> </dd></dl>
+</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
+<a class="anchor" id="a140f2be6b82a3594a8f351e53178dd57"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -178,27 +172,38 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">static <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> tvm::tir::RealizeNode::make </td>
+          <td class="memname">tvm::tir::BufferRealizeNode::BufferRealizeNode </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a>&#160;</td>
-          <td class="paramname"><em>func</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
+          <td class="paramname"></td><td>)</td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
-          <td class="paramname"><em>value_index</em>, </td>
         </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">default</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+</div>
+</div>
+<a class="anchor" id="a60124ff42b906a73b6e755d62b740ecc"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
         <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a>&#160;</td>
-          <td class="paramname"><em>dtype</em>, </td>
+          <td class="memname">tvm::tir::BufferRealizeNode::BufferRealizeNode </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td>
+          <td class="paramname"><em>buffer</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td>
           <td class="paramname"><em>bounds</em>, </td>
         </tr>
         <tr>
@@ -221,14 +226,15 @@ Static Public Attributes</h2></td></tr>
       </table>
   </td>
   <td class="mlabels-right">
-<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
   </tr>
 </table>
 </div><div class="memdoc">
 
 </div>
 </div>
-<a class="anchor" id="a7b03b82b28f358f5cf41a0a846064cd7"></a>
+<h2 class="groupheader">Member Function Documentation</h2>
+<a class="anchor" id="a9c1ef5aafa678f60f9eba1d41816838d"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -236,9 +242,9 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">bool tvm::tir::RealizeNode::SEqualReduce </td>
+          <td class="memname">bool tvm::tir::BufferRealizeNode::SEqualReduce </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1tir_1_1RealizeNode.html">RealizeNode</a> *&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> *&#160;</td>
           <td class="paramname"><em>other</em>, </td>
         </tr>
         <tr>
@@ -262,7 +268,7 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a579bb23d02358538a7036e2618758bdc"></a>
+<a class="anchor" id="adab031a0226cf61003f077e3fe043514"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -270,7 +276,7 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">void tvm::tir::RealizeNode::SHashReduce </td>
+          <td class="memname">void tvm::tir::BufferRealizeNode::SHashReduce </td>
           <td>(</td>
           <td class="paramtype"><a class="el" href="classtvm_1_1SHashReducer.html">SHashReducer</a>&#160;</td>
           <td class="paramname"><em>hash_reduce</em></td><td>)</td>
@@ -286,14 +292,14 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="ae71417374f0059376826f6ad13d60e1c"></a>
+<a class="anchor" id="a8d39e15d23d3f0ffc35d5092619d7ef8"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">tvm::tir::RealizeNode::TVM_DECLARE_FINAL_OBJECT_INFO </td>
+          <td class="memname">tvm::tir::BufferRealizeNode::TVM_DECLARE_FINAL_OBJECT_INFO </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1RealizeNode.html">RealizeNode</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a>&#160;</td>
           <td class="paramname">, </td>
         </tr>
         <tr>
@@ -312,7 +318,7 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a81eb59b0034a600c759d231cb23a1d13"></a>
+<a class="anchor" id="a1e66d8e7ccb2d0ab73a44775edabb38d"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -320,7 +326,7 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">void tvm::tir::RealizeNode::VisitAttrs </td>
+          <td class="memname">void tvm::tir::BufferRealizeNode::VisitAttrs </td>
           <td>(</td>
           <td class="paramtype"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> *&#160;</td>
           <td class="paramname"><em>v</em></td><td>)</td>
@@ -337,7 +343,7 @@ Static Public Attributes</h2></td></tr>
 </div>
 </div>
 <h2 class="groupheader">Member Data Documentation</h2>
-<a class="anchor" id="ad65cce50b516f20690ae7092e6b35c69"></a>
+<a class="anchor" id="ae9ae849c643a8f701b7f20f17a8432ba"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -345,7 +351,7 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">constexpr const char* tvm::tir::RealizeNode::_type_key = &quot;Realize&quot;</td>
+          <td class="memname">constexpr const char* tvm::tir::BufferRealizeNode::_type_key = &quot;BufferRealize&quot;</td>
         </tr>
       </table>
   </td>
@@ -357,12 +363,12 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="aa8a89873f73042f8784913b7f088297e"></a>
+<a class="anchor" id="ab332fa7cb914acec5456d63a698c7d34"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> tvm::tir::RealizeNode::body</td>
+          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> tvm::tir::BufferRealizeNode::body</td>
         </tr>
       </table>
 </div><div class="memdoc">
@@ -371,12 +377,12 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a5e2e8544a255e076311ff44eee46b9c7"></a>
+<a class="anchor" id="ac111908806003589f64a8eb7b068272f"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a> tvm::tir::RealizeNode::bounds</td>
+          <td class="memname"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt;<a class="el" href="classtvm_1_1Range.html">Range</a>&gt; tvm::tir::BufferRealizeNode::bounds</td>
         </tr>
       </table>
 </div><div class="memdoc">
@@ -385,59 +391,31 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a19b83a431ad0ac0d5c361b359f0bb0fa"></a>
+<a class="anchor" id="a49068e03d737011da9d55ebf52139eac"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::tir::RealizeNode::condition</td>
+          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> tvm::tir::BufferRealizeNode::buffer</td>
         </tr>
       </table>
 </div><div class="memdoc">
 
-<p>Only realize if condition holds. </p>
-
-</div>
-</div>
-<a class="anchor" id="a94cf509b9b13b5975fde6e04abf15314"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a> tvm::tir::RealizeNode::dtype</td>
-        </tr>
-      </table>
-</div><div class="memdoc">
-
-<p>The data type of the array. </p>
+<p>The buffer variable. </p>
 
 </div>
 </div>
-<a class="anchor" id="ac74657f89ed01d27c42a123aa9c4e1d0"></a>
+<a class="anchor" id="a8ab45a20cd77a3c80187355aa211a9b9"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> tvm::tir::RealizeNode::func</td>
+          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::tir::BufferRealizeNode::condition</td>
         </tr>
       </table>
 </div><div class="memdoc">
 
-<p>The function to be realized. </p>
-
-</div>
-</div>
-<a class="anchor" id="ab126f401e72fe4b9b1ceee3a9dffaaef"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int tvm::tir::RealizeNode::value_index</td>
-        </tr>
-      </table>
-</div><div class="memdoc">
-
-<p>The output value index if func's value is a tuple. </p>
+<p>Only realize if condition holds. </p>
 
 </div>
 </div>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__coll__graph.svg b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__coll__graph.svg
new file mode 100644
index 0000000..ccf44d6
--- /dev/null
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__coll__graph.svg
@@ -0,0 +1,203 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: tvm::tir::BufferRealizeNode Pages: 1 -->
+<svg width="678pt" height="638pt"
+ viewBox="0.00 0.00 678.00 638.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 634)">
+<title>tvm::tir::BufferRealizeNode</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-634 674,-634 674,4 -4,4"/>
+<!-- Node1 -->
+<g id="node1" class="node"><title>Node1</title>
+<polygon fill="#bfbfbf" stroke="black" points="277.5,-0.5 277.5,-112.5 482.5,-112.5 482.5,-0.5 277.5,-0.5"/>
+<text text-anchor="middle" x="380" y="-100.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::BufferRealizeNode</text>
+<polyline fill="none" stroke="black" points="277.5,-93.5 482.5,-93.5 "/>
+<text text-anchor="start" x="285.5" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<polyline fill="none" stroke="black" points="277.5,-74.5 482.5,-74.5 "/>
+<text text-anchor="start" x="285.5" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ VisitAttrs()</text>
+<text text-anchor="start" x="285.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SEqualReduce()</text>
+<text text-anchor="start" x="285.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SHashReduce()</text>
+<text text-anchor="start" x="285.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealizeNode()</text>
+<text text-anchor="start" x="285.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealizeNode()</text>
+<text text-anchor="start" x="285.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
+</g>
+<!-- Node2 -->
+<g id="node2" class="node"><title>Node2</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1StmtNode.html" target="_top" xlink:title="Base node of all statements. ">
+<polygon fill="white" stroke="black" points="0,-160.5 0,-261.5 204,-261.5 204,-160.5 0,-160.5"/>
+<text text-anchor="middle" x="102" y="-249.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::StmtNode</text>
+<polyline fill="none" stroke="black" points="0,-242.5 204,-242.5 "/>
+<text text-anchor="start" x="8" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<text text-anchor="start" x="8" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_sequal</text>
+<text text-anchor="start" x="8" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<text text-anchor="start" x="8" y="-197.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_shash</text>
+<text text-anchor="start" x="8" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<polyline fill="none" stroke="black" points="0,-179.5 204,-179.5 "/>
+<text text-anchor="start" x="8" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_BASE_OBJECT_INFO()</text>
+</a>
+</g>
+</g>
+<!-- Node2&#45;&gt;Node1 -->
+<g id="edge1" class="edge"><title>Node2&#45;&gt;Node1</title>
+<path fill="none" stroke="midnightblue" d="M185.283,-154.996C198.733,-146.679 212.632,-138.412 226,-131 242.323,-121.949 259.938,-112.902 277.221,-104.405"/>
+<polygon fill="none" stroke="midnightblue" points="183.154,-152.199 176.519,-160.46 186.857,-158.139 183.154,-152.199"/>
+</g>
+<!-- Node3 -->
+<g id="node3" class="node"><title>Node3</title>
+<polygon fill="white" stroke="#bfbfbf" points="78.5,-472 78.5,-529 125.5,-529 125.5,-472 78.5,-472"/>
+<text text-anchor="middle" x="102" y="-517" font-family="Helvetica,sans-Serif" font-size="10.00">Object</text>
+<polyline fill="none" stroke="#bfbfbf" points="78.5,-510 125.5,-510 "/>
+<text text-anchor="middle" x="102" y="-498" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="78.5,-491 125.5,-491 "/>
+<text text-anchor="middle" x="102" y="-479" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+</g>
+<!-- Node3&#45;&gt;Node2 -->
+<g id="edge2" class="edge"><title>Node3&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M102,-461.62C102,-410.286 102,-318.533 102,-261.711"/>
+<polygon fill="none" stroke="midnightblue" points="98.5001,-461.933 102,-471.933 105.5,-461.933 98.5001,-461.933"/>
+</g>
+<!-- Node4 -->
+<g id="node4" class="node"><title>Node4</title>
+<g id="a_node4"><a xlink:href="classtvm_1_1Array.html" target="_top" xlink:title="{tvm::Array\&lt; tvm::Range \&gt;\n||+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ operator=()\l+ operator=()\land 13 more...\l}">
+<polygon fill="white" stroke="black" points="180,-280.5 180,-447.5 324,-447.5 324,-280.5 180,-280.5"/>
+<text text-anchor="middle" x="252" y="-435.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::Array&lt; tvm::Range &gt;</text>
+<polyline fill="none" stroke="black" points="180,-428.5 324,-428.5 "/>
+<text text-anchor="middle" x="252" y="-416.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="180,-409.5 324,-409.5 "/>
+<text text-anchor="start" x="188" y="-397.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-386.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-375.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-364.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-331.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="188" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ operator=()</text>
+<text text-anchor="start" x="188" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ operator=()</text>
+<text text-anchor="start" x="188" y="-287.5" font-family="Helvetica,sans-Serif" font-size="10.00">and 13 more...</text>
+</a>
+</g>
+</g>
+<!-- Node4&#45;&gt;Node1 -->
+<g id="edge3" class="edge"><title>Node4&#45;&gt;Node1</title>
+<path fill="none" stroke="#404040" d="M234.22,-280.245C230.027,-241.881 231.09,-196.755 249,-160 256.006,-145.623 266.111,-132.669 277.648,-121.196"/>
+<polygon fill="none" stroke="#404040" points="277.956,-120.908 279.607,-113.888 286.721,-112.712 285.071,-119.731 277.956,-120.908"/>
+<text text-anchor="middle" x="272.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00"> +bounds</text>
+</g>
+<!-- Node5 -->
+<g id="node5" class="node"><title>Node5</title>
+<polygon fill="white" stroke="#bfbfbf" points="404,-572.5 404,-629.5 466,-629.5 466,-572.5 404,-572.5"/>
+<text text-anchor="middle" x="435" y="-617.5" font-family="Helvetica,sans-Serif" font-size="10.00">ObjectRef</text>
+<polyline fill="none" stroke="#bfbfbf" points="404,-610.5 466,-610.5 "/>
+<text text-anchor="middle" x="435" y="-598.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="404,-591.5 466,-591.5 "/>
+<text text-anchor="middle" x="435" y="-579.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+</g>
+<!-- Node5&#45;&gt;Node4 -->
+<g id="edge4" class="edge"><title>Node5&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M393.607,-593.969C357.868,-586.488 307.736,-570.119 280,-535 260.719,-510.588 252.483,-477.954 249.5,-447.807"/>
+<polygon fill="none" stroke="midnightblue" points="393.252,-597.465 403.735,-595.933 394.585,-590.593 393.252,-597.465"/>
+</g>
+<!-- Node7 -->
+<g id="node7" class="node"><title>Node7</title>
+<g id="a_node7"><a xlink:href="classtvm_1_1BaseExpr.html" target="_top" xlink:title="Managed reference to BaseExprNode. ">
+<polygon fill="white" stroke="black" points="289,-466.5 289,-534.5 439,-534.5 439,-466.5 289,-466.5"/>
+<text text-anchor="middle" x="364" y="-522.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::BaseExpr</text>
+<polyline fill="none" stroke="black" points="289,-515.5 439,-515.5 "/>
+<text text-anchor="middle" x="364" y="-503.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="289,-496.5 439,-496.5 "/>
+<text text-anchor="start" x="297" y="-484.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
+<text text-anchor="start" x="297" y="-473.5" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+</a>
+</g>
+</g>
+<!-- Node5&#45;&gt;Node7 -->
+<g id="edge7" class="edge"><title>Node5&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M409.095,-564.062C402.149,-554.425 394.672,-544.053 387.843,-534.578"/>
+<polygon fill="none" stroke="midnightblue" points="406.347,-566.234 415.034,-572.3 412.025,-562.141 406.347,-566.234"/>
+</g>
+<!-- Node8 -->
+<g id="node8" class="node"><title>Node8</title>
+<g id="a_node8"><a xlink:href="classtvm_1_1tir_1_1Buffer.html" target="_top" xlink:title="Buffer is a symbolic n&#45;darray structure. It is a composition of primitive symbolic types...">
+<polygon fill="white" stroke="black" points="404.5,-297 404.5,-431 515.5,-431 515.5,-297 404.5,-297"/>
+<text text-anchor="middle" x="460" y="-419" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::Buffer</text>
+<polyline fill="none" stroke="black" points="404.5,-412 515.5,-412 "/>
+<text text-anchor="middle" x="460" y="-400" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="404.5,-393 515.5,-393 "/>
+<text text-anchor="start" x="412.5" y="-381" font-family="Helvetica,sans-Serif" font-size="10.00">+ Buffer()</text>
+<text text-anchor="start" x="412.5" y="-370" font-family="Helvetica,sans-Serif" font-size="10.00">+ Buffer()</text>
+<text text-anchor="start" x="412.5" y="-359" font-family="Helvetica,sans-Serif" font-size="10.00">+ MakeStrideView()</text>
+<text text-anchor="start" x="412.5" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00">+ MakeSlice()</text>
+<text text-anchor="start" x="412.5" y="-337" font-family="Helvetica,sans-Serif" font-size="10.00">+ access_ptr()</text>
+<text text-anchor="start" x="412.5" y="-326" font-family="Helvetica,sans-Serif" font-size="10.00">+ vload()</text>
+<text text-anchor="start" x="412.5" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00">+ vstore()</text>
+<text text-anchor="start" x="412.5" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">+ operator&#45;&gt;()</text>
+</a>
+</g>
+</g>
+<!-- Node5&#45;&gt;Node8 -->
+<g id="edge9" class="edge"><title>Node5&#45;&gt;Node8</title>
+<path fill="none" stroke="midnightblue" d="M443.345,-562.489C445.106,-553.52 446.794,-543.949 448,-535 452.583,-500.986 455.477,-462.892 457.271,-431.4"/>
+<polygon fill="none" stroke="midnightblue" points="439.914,-561.798 441.347,-572.295 446.773,-563.195 439.914,-561.798"/>
+</g>
+<!-- Node9 -->
+<g id="node9" class="node"><title>Node9</title>
+<g id="a_node9"><a xlink:href="classtvm_1_1tir_1_1Stmt.html" target="_top" xlink:title="Container of all statements. ">
+<polygon fill="white" stroke="black" points="520,-330 520,-398 670,-398 670,-330 520,-330"/>
+<text text-anchor="middle" x="595" y="-386" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::Stmt</text>
+<polyline fill="none" stroke="black" points="520,-379 670,-379 "/>
+<text text-anchor="middle" x="595" y="-367" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="520,-360 670,-360 "/>
+<text text-anchor="start" x="528" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
+<text text-anchor="start" x="528" y="-337" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+</a>
+</g>
+</g>
+<!-- Node5&#45;&gt;Node9 -->
+<g id="edge11" class="edge"><title>Node5&#45;&gt;Node9</title>
+<path fill="none" stroke="midnightblue" d="M459.656,-563.787C490.443,-518.569 542.673,-441.856 572.444,-398.129"/>
+<polygon fill="none" stroke="midnightblue" points="456.717,-561.884 453.982,-572.12 462.503,-565.823 456.717,-561.884"/>
+</g>
+<!-- Node6 -->
+<g id="node6" class="node"><title>Node6</title>
+<g id="a_node6"><a xlink:href="classtvm_1_1PrimExpr.html" target="_top" xlink:title="Reference to PrimExprNode. ">
+<polygon fill="white" stroke="black" points="305,-160.5 305,-261.5 455,-261.5 455,-160.5 305,-160.5"/>
+<text text-anchor="middle" x="380" y="-249.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::PrimExpr</text>
+<polyline fill="none" stroke="black" points="305,-242.5 455,-242.5 "/>
+<text text-anchor="middle" x="380" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="305,-223.5 455,-223.5 "/>
+<text text-anchor="start" x="313" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ PrimExpr()</text>
+<text text-anchor="start" x="313" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ PrimExpr()</text>
+<text text-anchor="start" x="313" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ dtype()</text>
+<text text-anchor="start" x="313" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
+<text text-anchor="start" x="313" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+</a>
+</g>
+</g>
+<!-- Node6&#45;&gt;Node1 -->
+<g id="edge5" class="edge"><title>Node6&#45;&gt;Node1</title>
+<path fill="none" stroke="#404040" d="M380,-160.488C380,-149.127 380,-136.86 380,-124.888"/>
+<polygon fill="none" stroke="#404040" points="380,-124.569 376,-118.57 380,-112.569 384,-118.569 380,-124.569"/>
+<text text-anchor="middle" x="407.5" y="-134" font-family="Helvetica,sans-Serif" font-size="10.00"> +condition</text>
+</g>
+<!-- Node7&#45;&gt;Node6 -->
+<g id="edge6" class="edge"><title>Node7&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M366.423,-455.963C369.331,-403.711 374.195,-316.304 377.244,-261.526"/>
+<polygon fill="none" stroke="midnightblue" points="362.912,-456.06 365.851,-466.239 369.901,-456.449 362.912,-456.06"/>
+</g>
+<!-- Node8&#45;&gt;Node1 -->
+<g id="edge8" class="edge"><title>Node8&#45;&gt;Node1</title>
+<path fill="none" stroke="#404040" d="M472.987,-296.633C478.119,-256.019 479.601,-203.649 464,-160 459.329,-146.93 452.194,-134.328 444.017,-122.698"/>
+<polygon fill="none" stroke="#404040" points="443.768,-122.365 436.975,-119.943 436.598,-112.742 443.39,-115.163 443.768,-122.365"/>
+<text text-anchor="middle" x="497" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00"> +buffer</text>
+</g>
+<!-- Node9&#45;&gt;Node1 -->
+<g id="edge10" class="edge"><title>Node9&#45;&gt;Node1</title>
+<path fill="none" stroke="#404040" d="M589.179,-329.903C580.354,-287.172 560.287,-212.123 521,-160 510.092,-145.528 496.572,-132.218 482.258,-120.318"/>
+<polygon fill="none" stroke="#404040" points="482.121,-120.208 474.938,-119.577 472.758,-112.703 479.941,-113.334 482.121,-120.208"/>
+<text text-anchor="middle" x="588.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00"> +body</text>
+</g>
+</g>
+</svg>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__inherit__graph.svg b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__inherit__graph.svg
similarity index 50%
copy from docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__inherit__graph.svg
copy to docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__inherit__graph.svg
index cb7d187..83784cd 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__inherit__graph.svg
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealizeNode__inherit__graph.svg
@@ -3,63 +3,64 @@
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 <!-- Generated by graphviz version 2.38.0 (20140413.2041)
  -->
-<!-- Title: tvm::tir::PrefetchNode Pages: 1 -->
-<svg width="213pt" height="386pt"
- viewBox="0.00 0.00 213.00 386.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 382)">
-<title>tvm::tir::PrefetchNode</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-382 209,-382 209,4 -4,4"/>
+<!-- Title: tvm::tir::BufferRealizeNode Pages: 1 -->
+<svg width="213pt" height="397pt"
+ viewBox="0.00 0.00 213.00 397.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 393)">
+<title>tvm::tir::BufferRealizeNode</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-393 209,-393 209,4 -4,4"/>
 <!-- Node1 -->
 <g id="node1" class="node"><title>Node1</title>
-<polygon fill="#bfbfbf" stroke="black" points="-1.42109e-14,-0.5 -1.42109e-14,-145.5 205,-145.5 205,-0.5 -1.42109e-14,-0.5"/>
-<text text-anchor="middle" x="102.5" y="-133.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::PrefetchNode</text>
-<polyline fill="none" stroke="black" points="-1.42109e-14,-126.5 205,-126.5 "/>
-<text text-anchor="start" x="8" y="-114.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ func</text>
-<text text-anchor="start" x="8" y="-103.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ value_index</text>
-<text text-anchor="start" x="8" y="-92.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ dtype</text>
-<text text-anchor="start" x="8" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ bounds</text>
-<text text-anchor="start" x="8" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
-<polyline fill="none" stroke="black" points="-1.42109e-14,-63.5 205,-63.5 "/>
-<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ VisitAttrs()</text>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SEqualReduce()</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SHashReduce()</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
-<text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ make()</text>
+<polygon fill="#bfbfbf" stroke="black" points="-1.42109e-14,-0.5 -1.42109e-14,-156.5 205,-156.5 205,-0.5 -1.42109e-14,-0.5"/>
+<text text-anchor="middle" x="102.5" y="-144.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::BufferRealizeNode</text>
+<polyline fill="none" stroke="black" points="-1.42109e-14,-137.5 205,-137.5 "/>
+<text text-anchor="start" x="8" y="-125.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ buffer</text>
+<text text-anchor="start" x="8" y="-114.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ bounds</text>
+<text text-anchor="start" x="8" y="-103.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ condition</text>
+<text text-anchor="start" x="8" y="-92.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ body</text>
+<text text-anchor="start" x="8" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<polyline fill="none" stroke="black" points="-1.42109e-14,-74.5 205,-74.5 "/>
+<text text-anchor="start" x="8" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ VisitAttrs()</text>
+<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SEqualReduce()</text>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SHashReduce()</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealizeNode()</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealizeNode()</text>
+<text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
 </g>
 <!-- Node2 -->
 <g id="node2" class="node"><title>Node2</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1StmtNode.html" target="_top" xlink:title="Base node of all statements. ">
-<polygon fill="white" stroke="black" points="0.5,-182.5 0.5,-283.5 204.5,-283.5 204.5,-182.5 0.5,-182.5"/>
-<text text-anchor="middle" x="102.5" y="-271.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::StmtNode</text>
-<polyline fill="none" stroke="black" points="0.5,-264.5 204.5,-264.5 "/>
-<text text-anchor="start" x="8.5" y="-252.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
-<text text-anchor="start" x="8.5" y="-241.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_sequal</text>
-<text text-anchor="start" x="8.5" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
-<text text-anchor="start" x="8.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_shash</text>
-<text text-anchor="start" x="8.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
-<polyline fill="none" stroke="black" points="0.5,-201.5 204.5,-201.5 "/>
-<text text-anchor="start" x="8.5" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_BASE_OBJECT_INFO()</text>
+<polygon fill="white" stroke="black" points="0.5,-193.5 0.5,-294.5 204.5,-294.5 204.5,-193.5 0.5,-193.5"/>
+<text text-anchor="middle" x="102.5" y="-282.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::StmtNode</text>
+<polyline fill="none" stroke="black" points="0.5,-275.5 204.5,-275.5 "/>
+<text text-anchor="start" x="8.5" y="-263.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<text text-anchor="start" x="8.5" y="-252.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_sequal</text>
+<text text-anchor="start" x="8.5" y="-241.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<text text-anchor="start" x="8.5" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_shash</text>
+<text text-anchor="start" x="8.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<polyline fill="none" stroke="black" points="0.5,-212.5 204.5,-212.5 "/>
+<text text-anchor="start" x="8.5" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_BASE_OBJECT_INFO()</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node1 -->
 <g id="edge1" class="edge"><title>Node2&#45;&gt;Node1</title>
-<path fill="none" stroke="midnightblue" d="M102.5,-172.381C102.5,-163.656 102.5,-154.595 102.5,-145.66"/>
-<polygon fill="none" stroke="midnightblue" points="99.0001,-172.441 102.5,-182.441 106,-172.441 99.0001,-172.441"/>
+<path fill="none" stroke="midnightblue" d="M102.5,-182.815C102.5,-174.259 102.5,-165.363 102.5,-156.541"/>
+<polygon fill="none" stroke="midnightblue" points="99.0001,-183.056 102.5,-193.056 106,-183.056 99.0001,-183.056"/>
 </g>
 <!-- Node3 -->
 <g id="node3" class="node"><title>Node3</title>
-<polygon fill="white" stroke="#bfbfbf" points="79,-320.5 79,-377.5 126,-377.5 126,-320.5 79,-320.5"/>
-<text text-anchor="middle" x="102.5" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00">Object</text>
-<polyline fill="none" stroke="#bfbfbf" points="79,-358.5 126,-358.5 "/>
-<text text-anchor="middle" x="102.5" y="-346.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
-<polyline fill="none" stroke="#bfbfbf" points="79,-339.5 126,-339.5 "/>
-<text text-anchor="middle" x="102.5" y="-327.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polygon fill="white" stroke="#bfbfbf" points="79,-331.5 79,-388.5 126,-388.5 126,-331.5 79,-331.5"/>
+<text text-anchor="middle" x="102.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00">Object</text>
+<polyline fill="none" stroke="#bfbfbf" points="79,-369.5 126,-369.5 "/>
+<text text-anchor="middle" x="102.5" y="-357.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="79,-350.5 126,-350.5 "/>
+<text text-anchor="middle" x="102.5" y="-338.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
 </g>
 <!-- Node3&#45;&gt;Node2 -->
 <g id="edge2" class="edge"><title>Node3&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M102.5,-310.354C102.5,-301.796 102.5,-292.578 102.5,-283.608"/>
-<polygon fill="none" stroke="midnightblue" points="99.0001,-310.404 102.5,-320.404 106,-310.404 99.0001,-310.404"/>
+<path fill="none" stroke="midnightblue" d="M102.5,-321.354C102.5,-312.796 102.5,-303.578 102.5,-294.608"/>
+<polygon fill="none" stroke="midnightblue" points="99.0001,-321.404 102.5,-331.404 106,-321.404 99.0001,-321.404"/>
 </g>
 </g>
 </svg>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__coll__graph.svg b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__coll__graph.svg
new file mode 100644
index 0000000..1102cc7
--- /dev/null
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__coll__graph.svg
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: tvm::tir::BufferRealize Pages: 1 -->
+<svg width="172pt" height="287pt"
+ viewBox="0.00 0.00 172.00 287.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283)">
+<title>tvm::tir::BufferRealize</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-283 168,-283 168,4 -4,4"/>
+<!-- Node1 -->
+<g id="node1" class="node"><title>Node1</title>
+<polygon fill="#bfbfbf" stroke="black" points="0,-0.5 0,-79.5 164,-79.5 164,-0.5 0,-0.5"/>
+<text text-anchor="middle" x="82" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::BufferRealize</text>
+<polyline fill="none" stroke="black" points="0,-60.5 164,-60.5 "/>
+<text text-anchor="middle" x="82" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="0,-41.5 164,-41.5 "/>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealize()</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_NOTNULLABLE</text>
+<text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">_OBJECT_REF_METHODS()</text>
+</g>
+<!-- Node2 -->
+<g id="node2" class="node"><title>Node2</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1Stmt.html" target="_top" xlink:title="Container of all statements. ">
+<polygon fill="white" stroke="black" points="7,-116.5 7,-184.5 157,-184.5 157,-116.5 7,-116.5"/>
+<text text-anchor="middle" x="82" y="-172.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::Stmt</text>
+<polyline fill="none" stroke="black" points="7,-165.5 157,-165.5 "/>
+<text text-anchor="middle" x="82" y="-153.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="7,-146.5 157,-146.5 "/>
+<text text-anchor="start" x="15" y="-134.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
+<text text-anchor="start" x="15" y="-123.5" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+</a>
+</g>
+</g>
+<!-- Node2&#45;&gt;Node1 -->
+<g id="edge1" class="edge"><title>Node2&#45;&gt;Node1</title>
+<path fill="none" stroke="midnightblue" d="M82,-106.274C82,-97.4764 82,-88.2753 82,-79.6493"/>
+<polygon fill="none" stroke="midnightblue" points="78.5001,-106.357 82,-116.357 85.5001,-106.357 78.5001,-106.357"/>
+</g>
+<!-- Node3 -->
+<g id="node3" class="node"><title>Node3</title>
+<polygon fill="white" stroke="#bfbfbf" points="51,-221.5 51,-278.5 113,-278.5 113,-221.5 51,-221.5"/>
+<text text-anchor="middle" x="82" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00">ObjectRef</text>
+<polyline fill="none" stroke="#bfbfbf" points="51,-259.5 113,-259.5 "/>
+<text text-anchor="middle" x="82" y="-247.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="51,-240.5 113,-240.5 "/>
+<text text-anchor="middle" x="82" y="-228.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+</g>
+<!-- Node3&#45;&gt;Node2 -->
+<g id="edge2" class="edge"><title>Node3&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M82,-211.063C82,-202.393 82,-193.243 82,-184.786"/>
+<polygon fill="none" stroke="midnightblue" points="78.5001,-211.321 82,-221.321 85.5001,-211.321 78.5001,-211.321"/>
+</g>
+</g>
+</svg>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__inherit__graph.svg b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__inherit__graph.svg
new file mode 100644
index 0000000..1102cc7
--- /dev/null
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferRealize__inherit__graph.svg
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: tvm::tir::BufferRealize Pages: 1 -->
+<svg width="172pt" height="287pt"
+ viewBox="0.00 0.00 172.00 287.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 283)">
+<title>tvm::tir::BufferRealize</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-283 168,-283 168,4 -4,4"/>
+<!-- Node1 -->
+<g id="node1" class="node"><title>Node1</title>
+<polygon fill="#bfbfbf" stroke="black" points="0,-0.5 0,-79.5 164,-79.5 164,-0.5 0,-0.5"/>
+<text text-anchor="middle" x="82" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::BufferRealize</text>
+<polyline fill="none" stroke="black" points="0,-60.5 164,-60.5 "/>
+<text text-anchor="middle" x="82" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="0,-41.5 164,-41.5 "/>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ BufferRealize()</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_NOTNULLABLE</text>
+<text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">_OBJECT_REF_METHODS()</text>
+</g>
+<!-- Node2 -->
+<g id="node2" class="node"><title>Node2</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1Stmt.html" target="_top" xlink:title="Container of all statements. ">
+<polygon fill="white" stroke="black" points="7,-116.5 7,-184.5 157,-184.5 157,-116.5 7,-116.5"/>
+<text text-anchor="middle" x="82" y="-172.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::Stmt</text>
+<polyline fill="none" stroke="black" points="7,-165.5 157,-165.5 "/>
+<text text-anchor="middle" x="82" y="-153.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="7,-146.5 157,-146.5 "/>
+<text text-anchor="start" x="15" y="-134.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
+<text text-anchor="start" x="15" y="-123.5" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+</a>
+</g>
+</g>
+<!-- Node2&#45;&gt;Node1 -->
+<g id="edge1" class="edge"><title>Node2&#45;&gt;Node1</title>
+<path fill="none" stroke="midnightblue" d="M82,-106.274C82,-97.4764 82,-88.2753 82,-79.6493"/>
+<polygon fill="none" stroke="midnightblue" points="78.5001,-106.357 82,-116.357 85.5001,-106.357 78.5001,-106.357"/>
+</g>
+<!-- Node3 -->
+<g id="node3" class="node"><title>Node3</title>
+<polygon fill="white" stroke="#bfbfbf" points="51,-221.5 51,-278.5 113,-278.5 113,-221.5 51,-221.5"/>
+<text text-anchor="middle" x="82" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00">ObjectRef</text>
+<polyline fill="none" stroke="#bfbfbf" points="51,-259.5 113,-259.5 "/>
+<text text-anchor="middle" x="82" y="-247.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="51,-240.5 113,-240.5 "/>
+<text text-anchor="middle" x="82" y="-228.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+</g>
+<!-- Node3&#45;&gt;Node2 -->
+<g id="edge2" class="edge"><title>Node3&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M82,-211.063C82,-202.393 82,-193.243 82,-184.786"/>
+<polygon fill="none" stroke="midnightblue" points="78.5001,-211.321 82,-221.321 85.5001,-211.321 78.5001,-211.321"/>
+</g>
+</g>
+</svg>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html b/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
index 7288eba..f107d40 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
@@ -95,6 +95,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 
+<p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a>.  
+ <a href="classtvm_1_1tir_1_1BufferStore.html#details">More...</a></p>
+
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
 Inheritance diagram for tvm::tir::BufferStore:</div>
@@ -119,7 +122,10 @@ Public Member Functions</h2></td></tr>
 <tr class="memitem:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1Stmt.html#aaca2f7bd4110028470b82825bcbb87ac">TVM_DEFINE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, ObjectRef, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
 <tr class="separator:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a>. </p>
+<dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a> </dd></dl>
+</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
 <a class="anchor" id="af7e9dbddd9b6f3a604067d805db61156"></a>
 <div class="memitem">
 <div class="memproto">
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferStoreNode.html b/docs/doxygen/classtvm_1_1tir_1_1BufferStoreNode.html
index 8d893b8..9db3a97 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1BufferStoreNode.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1BufferStoreNode.html
@@ -97,6 +97,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 
+<p>Store value to the high dimension buffer.  
+ <a href="classtvm_1_1tir_1_1BufferStoreNode.html#details">More...</a></p>
+
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
 Inheritance diagram for tvm::tir::BufferStoreNode:</div>
@@ -149,7 +152,10 @@ Static Public Attributes</h2></td></tr>
 <tr class="memitem:ab7e026e32383e67e620719b025e00056 inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memItemLeft" align="right" valign="top">static constexpr const bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab7e026e32383e67e620719b025e00056">_type_has_method_shash_reduce</a> = true</td></tr>
 <tr class="separator:ab7e026e32383e67e620719b025e00056 inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<h2 class="groupheader">Member Function Documentation</h2>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Store value to the high dimension buffer. </p>
+<div class="fragment"><div class="line"><a class="code" href="classtvm_1_1tir_1_1BufferStoreNode.html#a09ecabba35b3bd119de952f49028274d">buffer</a>[i, j] = <a class="code" href="classtvm_1_1tir_1_1BufferStoreNode.html#a435d8c33bc38bc7bbc7d2ea5c50ec6aa">value</a>;</div></div><!-- fragment --> <dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> </dd></dl>
+</div><h2 class="groupheader">Member Function Documentation</h2>
 <a class="anchor" id="a52bc65489d1bf6aeb14bc78f6f1bdf1f"></a>
 <div class="memitem">
 <div class="memproto">
diff --git a/docs/doxygen/classtvm_1_1tir_1_1CallNode.html b/docs/doxygen/classtvm_1_1tir_1_1CallNode.html
index 84d6195..dcc14c2 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1CallNode.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1CallNode.html
@@ -253,6 +253,7 @@ Static Public Attributes</h2></td></tr>
 </td></tr>
 <tr><td class="fieldname"><a class="anchor" id="a13e1f0f48f488fd085ca2684738fa97aa57993161c871dc15a052154594845514"></a>Halide&#160;</td><td class="fielddoc">
 <p>Halide-style call, evaluates func(args). </p>
+<dl class="section note"><dt>Note</dt><dd>Deprecated, move to <a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> in the future. </dd></dl>
 </td></tr>
 <tr><td class="fieldname"><a class="anchor" id="a13e1f0f48f488fd085ca2684738fa97aa53257cce64e712a51504dc56287f9bce"></a>Intrinsic&#160;</td><td class="fielddoc">
 <p>Intrinsic functions. </p>
@@ -642,6 +643,7 @@ Static Public Attributes</h2></td></tr>
 </div><div class="memdoc">
 
 <p>The function to be called. </p>
+<dl class="section note"><dt>Note</dt><dd>Deprecated, move to <a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> in the future. </dd></dl>
 
 </div>
 </div>
@@ -870,6 +872,7 @@ Static Public Attributes</h2></td></tr>
 </div><div class="memdoc">
 
 <p>The output value index if func's value is a tuple. </p>
+<dl class="section note"><dt>Note</dt><dd>Deprecated, move to <a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> in the future. </dd></dl>
 
 </div>
 </div>
diff --git a/docs/doxygen/dir_000019_000010.html b/docs/doxygen/classtvm_1_1tir_1_1Prefetch-members.html
similarity index 61%
copy from docs/doxygen/dir_000019_000010.html
copy to docs/doxygen/classtvm_1_1tir_1_1Prefetch-members.html
index 43c04e0..503576a 100644
--- a/docs/doxygen/dir_000019_000010.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1Prefetch-members.html
@@ -4,7 +4,7 @@
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.11"/>
-<title>tvm: include/tvm/te -&gt; tir Relation</title>
+<title>tvm: Member List</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
@@ -39,7 +39,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
     <ul class="tablist">
       <li><a href="index.html"><span>Main&#160;Page</span></a></li>
       <li><a href="namespaces.html"><span>Namespaces</span></a></li>
-      <li><a href="annotated.html"><span>Classes</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Classes</span></a></li>
       <li><a href="files.html"><span>Files</span></a></li>
       <li>
         <div id="MSearchBox" class="MSearchBoxInactive">
@@ -59,6 +59,14 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
       </li>
     </ul>
   </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Class&#160;List</span></a></li>
+      <li><a href="classes.html"><span>Class&#160;Index</span></a></li>
+      <li><a href="inherits.html"><span>Class&#160;Hierarchy</span></a></li>
+      <li><a href="functions.html"><span>Class&#160;Members</span></a></li>
+    </ul>
+  </div>
 <!-- window showing the filter options -->
 <div id="MSearchSelectWindow"
      onmouseover="return searchBox.OnSearchSelectShow()"
@@ -75,11 +83,21 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="dir_d44c64559bbebec7f509842c48db8b23.html">include</a></li><li class="navelem"><a class="el" href="dir_b4c7d8e826c599ba55146c099a14beb5.html">tvm</a></li><li class="navelem"><a class="el" href="dir_f97d855a3173728370e632aa77170e34.html">te</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a></li>  </ul>
 </div>
 </div><!-- top -->
+<div class="header">
+  <div class="headertitle">
+<div class="title">tvm::tir::Prefetch Member List</div>  </div>
+</div><!--header-->
 <div class="contents">
-<h3>te &rarr; tir Relation</h3><table class="dirtab"><tr class="dirtab"><th class="dirtab">File in include/tvm/te</th><th class="dirtab">Includes file in include/tvm/tir</th></tr><tr class="dirtab"><td class="dirtab"><a class="el" href="autodiff_8h.html">autodiff.h</a></td><td class="dirtab"><a class="el" href="tir_2expr_8h.html">expr.h</a></td></tr><tr class="dirtab"><td class="dirtab"><a class="el" href="operation_8h.html">operation.h</a></td><td class="dirtab"><a class="el" href="buff [...]
+
+<p>This is the complete list of members for <a class="el" href="classtvm_1_1tir_1_1Prefetch.html">tvm::tir::Prefetch</a>, including all inherited members.</p>
+<table class="directory">
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html#ac9c6a46f77c134c77f496a9a8b4c7128">Prefetch</a>(Buffer buffer, Array&lt; Range &gt; bounds)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">tvm::tir::Prefetch</a></td><td class="entry"><span class="mlabel">explicit</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html#ab50b989aeb05d3fd2b70de6267637ce4">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a>(Prefetch, Stmt, PrefetchNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">tvm::tir::Prefetch</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Stmt.html#aaca2f7bd4110028470b82825bcbb87ac">TVM_DEFINE_OBJECT_REF_METHODS</a>(Stmt, ObjectRef, StmtNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></td><td class="entry"></td></tr>
+</table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
 Generated by &#160;<a href="http://www.doxygen.org/index.html">
diff --git a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html b/docs/doxygen/classtvm_1_1tir_1_1Prefetch.html
similarity index 74%
copy from docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
copy to docs/doxygen/classtvm_1_1tir_1_1Prefetch.html
index 7288eba..6e9b5f8 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1BufferStore.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1Prefetch.html
@@ -4,7 +4,7 @@
 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
 <meta http-equiv="X-UA-Compatible" content="IE=9"/>
 <meta name="generator" content="Doxygen 1.8.11"/>
-<title>tvm: tvm::tir::BufferStore Class Reference</title>
+<title>tvm: tvm::tir::Prefetch Class Reference</title>
 <link href="tabs.css" rel="stylesheet" type="text/css"/>
 <script type="text/javascript" src="jquery.js"></script>
 <script type="text/javascript" src="dynsections.js"></script>
@@ -83,44 +83,50 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 
 <div id="nav-path" class="navpath">
   <ul>
-<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a></li>  </ul>
+<li class="navelem"><a class="el" href="namespacetvm.html">tvm</a></li><li class="navelem"><a class="el" href="namespacetvm_1_1tir.html">tir</a></li><li class="navelem"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a></li>  </ul>
 </div>
 </div><!-- top -->
 <div class="header">
   <div class="summary">
 <a href="#pub-methods">Public Member Functions</a> &#124;
-<a href="classtvm_1_1tir_1_1BufferStore-members.html">List of all members</a>  </div>
+<a href="classtvm_1_1tir_1_1Prefetch-members.html">List of all members</a>  </div>
   <div class="headertitle">
-<div class="title">tvm::tir::BufferStore Class Reference</div>  </div>
+<div class="title">tvm::tir::Prefetch Class Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
 
+<p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for abuffer. ">PrefetchNode</a>.  
+ <a href="classtvm_1_1tir_1_1Prefetch.html#details">More...</a></p>
+
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
-Inheritance diagram for tvm::tir::BufferStore:</div>
+Inheritance diagram for tvm::tir::Prefetch:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferStore__inherit__graph.svg" width="211" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1Prefetch__inherit__graph.svg" width="230" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
-Collaboration diagram for tvm::tir::BufferStore:</div>
+Collaboration diagram for tvm::tir::Prefetch:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1BufferStore__coll__graph.svg" width="211" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1Prefetch__coll__graph.svg" width="230" height="383"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
 Public Member Functions</h2></td></tr>
-<tr class="memitem:af7e9dbddd9b6f3a604067d805db61156"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html#af7e9dbddd9b6f3a604067d805db61156">BufferStore</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> buffer, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> value, <a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_ [...]
-<tr class="separator:af7e9dbddd9b6f3a604067d805db61156"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ad1941ecd1ad16134af970e7e845a676b"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html#ad1941ecd1ad16134af970e7e845a676b">TVM_DEFINE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a>, <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNod [...]
-<tr class="separator:ad1941ecd1ad16134af970e7e845a676b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac9c6a46f77c134c77f496a9a8b4c7128"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html#ac9c6a46f77c134c77f496a9a8b4c7128">Prefetch</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> buffer, <a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt; bounds)</td></tr>
+<tr class="separator:ac9c6a46f77c134c77f496a9a8b4c7128"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab50b989aeb05d3fd2b70de6267637ce4"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html#ab50b989aeb05d3fd2b70de6267637ce4">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a>, <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">PrefetchNode</ [...]
+<tr class="separator:ab50b989aeb05d3fd2b70de6267637ce4"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pub_methods_classtvm_1_1tir_1_1Stmt"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_classtvm_1_1tir_1_1Stmt')"><img src="closed.png" alt="-"/>&#160;Public Member Functions inherited from <a class="el" href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></td></tr>
 <tr class="memitem:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1Stmt.html#aaca2f7bd4110028470b82825bcbb87ac">TVM_DEFINE_OBJECT_REF_METHODS</a> (<a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>, ObjectRef, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
 <tr class="separator:aaca2f7bd4110028470b82825bcbb87ac inherit pub_methods_classtvm_1_1tir_1_1Stmt"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
-<h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
-<a class="anchor" id="af7e9dbddd9b6f3a604067d805db61156"></a>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for abuffer. ">PrefetchNode</a>. </p>
+<dl class="section see"><dt>See also</dt><dd><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for abuffer. ">PrefetchNode</a> </dd></dl>
+</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
+<a class="anchor" id="ac9c6a46f77c134c77f496a9a8b4c7128"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -128,7 +134,7 @@ Public Member Functions</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">tvm::tir::BufferStore::BufferStore </td>
+          <td class="memname">tvm::tir::Prefetch::Prefetch </td>
           <td>(</td>
           <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td>
           <td class="paramname"><em>buffer</em>, </td>
@@ -136,14 +142,8 @@ Public Member Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
-          <td class="paramname"><em>value</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &gt;&#160;</td>
-          <td class="paramname"><em>indices</em>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td>
+          <td class="paramname"><em>bounds</em>&#160;</td>
         </tr>
         <tr>
           <td></td>
@@ -161,14 +161,14 @@ Public Member Functions</h2></td></tr>
 </div>
 </div>
 <h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="ad1941ecd1ad16134af970e7e845a676b"></a>
+<a class="anchor" id="ab50b989aeb05d3fd2b70de6267637ce4"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">tvm::tir::BufferStore::TVM_DEFINE_OBJECT_REF_METHODS </td>
+          <td class="memname">tvm::tir::Prefetch::TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a>&#160;</td>
           <td class="paramname">, </td>
         </tr>
         <tr>
@@ -180,7 +180,7 @@ Public Member Functions</h2></td></tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">PrefetchNode</a>&#160;</td>
           <td class="paramname">&#160;</td>
         </tr>
         <tr>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html
index 7e66088..b1276ef 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode-members.html
@@ -97,16 +97,15 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#a463ce5a124a6c222706888122bb44865">_type_has_method_sequal_reduce</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab7e026e32383e67e620719b025e00056">_type_has_method_shash_reduce</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a4b0e453459435cdd5dced95c419f95f8">_type_key</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a9ce8a82f03dd23e200ec6c611fddeb9c">bounds</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aaeccff8519185fc44caa07005ef56af3">dtype</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a59428251589e635271007c8b92a051e0">func</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a106bdc39611c6e46c78b7d6920c5c51d">make</a>(FunctionRef func, int value_index, DataType dtype, Region bounds)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#ac673cc61f7ead139d993a974db4e2cee">bounds</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aeb3955b22bd5cf2675adc9bd95d69ca4">buffer</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#acaaa5e89462c7edf3019df4283ec74db">PrefetchNode</a>()=default</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a12d0e137a00b37d439d7a10677023c62">PrefetchNode</a>(Buffer buffer, Array&lt; Range &gt; bounds)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a40defcee76278429e8c6c3c981206d36">SEqualReduce</a>(const PrefetchNode *other, SEqualReducer equal) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#ad560931be434382ad0be20bb6bb7b827">SHashReduce</a>(SHashReducer hash_reduce) const </td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab4627fb1adbea88d4fe3a33679569421">TVM_DECLARE_BASE_OBJECT_INFO</a>(StmtNode, Object)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td><td class="entry"></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a2ad09dce7433dbd22eb1432a3e77f474">TVM_DECLARE_FINAL_OBJECT_INFO</a>(PrefetchNode, StmtNode)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a0c2bdc2deb363f41b0362c6a9ed4296f">value_index</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a491c03fa62a96b66b7ea0c8e9095b98e">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a491c03fa62a96b66b7ea0c8e9095b98e">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">tvm::tir::PrefetchNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
 </table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode.html b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode.html
index 112de1a..1cb3d2e 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode.html
+++ b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode.html
@@ -89,7 +89,6 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="header">
   <div class="summary">
 <a href="#pub-methods">Public Member Functions</a> &#124;
-<a href="#pub-static-methods">Static Public Member Functions</a> &#124;
 <a href="#pub-attribs">Public Attributes</a> &#124;
 <a href="#pub-static-attribs">Static Public Attributes</a> &#124;
 <a href="classtvm_1_1tir_1_1PrefetchNode-members.html">List of all members</a>  </div>
@@ -98,20 +97,20 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 
-<p>A prefetch hint of func.  
+<p>A prefetch hint for abuffer.  
  <a href="classtvm_1_1tir_1_1PrefetchNode.html#details">More...</a></p>
 
 <p><code>#include &lt;<a class="el" href="stmt_8h_source.html">stmt.h</a>&gt;</code></p>
 <div class="dynheader">
 Inheritance diagram for tvm::tir::PrefetchNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1PrefetchNode__inherit__graph.svg" width="284" height="515"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1PrefetchNode__inherit__graph.svg" width="284" height="500"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
 Collaboration diagram for tvm::tir::PrefetchNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg" width="887" height="663"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg" width="672" height="575"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -123,31 +122,24 @@ Public Member Functions</h2></td></tr>
 <tr class="separator:a40defcee76278429e8c6c3c981206d36"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ad560931be434382ad0be20bb6bb7b827"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#ad560931be434382ad0be20bb6bb7b827">SHashReduce</a> (<a class="el" href="classtvm_1_1SHashReducer.html">SHashReducer</a> hash_reduce) const </td></tr>
 <tr class="separator:ad560931be434382ad0be20bb6bb7b827"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:acaaa5e89462c7edf3019df4283ec74db"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#acaaa5e89462c7edf3019df4283ec74db">PrefetchNode</a> ()=default</td></tr>
+<tr class="separator:acaaa5e89462c7edf3019df4283ec74db"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a12d0e137a00b37d439d7a10677023c62"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a12d0e137a00b37d439d7a10677023c62">PrefetchNode</a> (<a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aeb3955b22bd5cf2675adc9bd95d69ca4">buffer</a>, <a class="el" href="classtvm_1_1Array.html">Array</a>&lt;  [...]
+<tr class="separator:a12d0e137a00b37d439d7a10677023c62"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a2ad09dce7433dbd22eb1432a3e77f474"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a2ad09dce7433dbd22eb1432a3e77f474">TVM_DECLARE_FINAL_OBJECT_INFO</a> (<a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">PrefetchNode</a>, <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>)</td></tr>
 <tr class="separator:a2ad09dce7433dbd22eb1432a3e77f474"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pub_methods_classtvm_1_1tir_1_1StmtNode"><td colspan="2" onclick="javascript:toggleInherit('pub_methods_classtvm_1_1tir_1_1StmtNode')"><img src="closed.png" alt="-"/>&#160;Public Member Functions inherited from <a class="el" href="classtvm_1_1tir_1_1StmtNode.html">tvm::tir::StmtNode</a></td></tr>
 <tr class="memitem:ab4627fb1adbea88d4fe3a33679569421 inherit pub_methods_classtvm_1_1tir_1_1StmtNode"><td class="memItemLeft" align="right" valign="top">&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtNode.html#ab4627fb1adbea88d4fe3a33679569421">TVM_DECLARE_BASE_OBJECT_INFO</a> (<a class="el" href="classtvm_1_1tir_1_1StmtNode.html">StmtNode</a>, Object)</td></tr>
 <tr class="separator:ab4627fb1adbea88d4fe3a33679569421 inherit pub_methods_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
-<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
-Static Public Member Functions</h2></td></tr>
-<tr class="memitem:a106bdc39611c6e46c78b7d6920c5c51d"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a106bdc39611c6e46c78b7d6920c5c51d">make</a> (<a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a59428251589e635271007c8b92a051e0">fun [...]
-<tr class="separator:a106bdc39611c6e46c78b7d6920c5c51d"><td class="memSeparator" colspan="2">&#160;</td></tr>
-</table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
 Public Attributes</h2></td></tr>
-<tr class="memitem:a59428251589e635271007c8b92a051e0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a59428251589e635271007c8b92a051e0">func</a></td></tr>
-<tr class="memdesc:a59428251589e635271007c8b92a051e0"><td class="mdescLeft">&#160;</td><td class="mdescRight">The function to be prefetched.  <a href="#a59428251589e635271007c8b92a051e0">More...</a><br /></td></tr>
-<tr class="separator:a59428251589e635271007c8b92a051e0"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a0c2bdc2deb363f41b0362c6a9ed4296f"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a0c2bdc2deb363f41b0362c6a9ed4296f">value_index</a></td></tr>
-<tr class="memdesc:a0c2bdc2deb363f41b0362c6a9ed4296f"><td class="mdescLeft">&#160;</td><td class="mdescRight">The output value index if func's value is a tuple.  <a href="#a0c2bdc2deb363f41b0362c6a9ed4296f">More...</a><br /></td></tr>
-<tr class="separator:a0c2bdc2deb363f41b0362c6a9ed4296f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:aaeccff8519185fc44caa07005ef56af3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aaeccff8519185fc44caa07005ef56af3">dtype</a></td></tr>
-<tr class="memdesc:aaeccff8519185fc44caa07005ef56af3"><td class="mdescLeft">&#160;</td><td class="mdescRight">The data type of the array.  <a href="#aaeccff8519185fc44caa07005ef56af3">More...</a><br /></td></tr>
-<tr class="separator:aaeccff8519185fc44caa07005ef56af3"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a9ce8a82f03dd23e200ec6c611fddeb9c"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#a9ce8a82f03dd23e200ec6c611fddeb9c">bounds</a></td></tr>
-<tr class="memdesc:a9ce8a82f03dd23e200ec6c611fddeb9c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Bounds to be prefetched.  <a href="#a9ce8a82f03dd23e200ec6c611fddeb9c">More...</a><br /></td></tr>
-<tr class="separator:a9ce8a82f03dd23e200ec6c611fddeb9c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aeb3955b22bd5cf2675adc9bd95d69ca4"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#aeb3955b22bd5cf2675adc9bd95d69ca4">buffer</a></td></tr>
+<tr class="memdesc:aeb3955b22bd5cf2675adc9bd95d69ca4"><td class="mdescLeft">&#160;</td><td class="mdescRight">The function to be prefetched.  <a href="#aeb3955b22bd5cf2675adc9bd95d69ca4">More...</a><br /></td></tr>
+<tr class="separator:aeb3955b22bd5cf2675adc9bd95d69ca4"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac673cc61f7ead139d993a974db4e2cee"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html#ac673cc61f7ead139d993a974db4e2cee">bounds</a></td></tr>
+<tr class="memdesc:ac673cc61f7ead139d993a974db4e2cee"><td class="mdescLeft">&#160;</td><td class="mdescRight">Bounds to be prefetched.  <a href="#ac673cc61f7ead139d993a974db4e2cee">More...</a><br /></td></tr>
+<tr class="separator:ac673cc61f7ead139d993a974db4e2cee"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-attribs"></a>
 Static Public Attributes</h2></td></tr>
@@ -162,9 +154,9 @@ Static Public Attributes</h2></td></tr>
 <tr class="separator:ab7e026e32383e67e620719b025e00056 inherit pub_static_attribs_classtvm_1_1tir_1_1StmtNode"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
-<div class="textblock"><p>A prefetch hint of func. </p>
-</div><h2 class="groupheader">Member Function Documentation</h2>
-<a class="anchor" id="a106bdc39611c6e46c78b7d6920c5c51d"></a>
+<div class="textblock"><p>A prefetch hint for abuffer. </p>
+</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
+<a class="anchor" id="acaaa5e89462c7edf3019df4283ec74db"></a>
 <div class="memitem">
 <div class="memproto">
 <table class="mlabels">
@@ -172,27 +164,38 @@ Static Public Attributes</h2></td></tr>
   <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname">static <a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> tvm::tir::PrefetchNode::make </td>
+          <td class="memname">tvm::tir::PrefetchNode::PrefetchNode </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a>&#160;</td>
-          <td class="paramname"><em>func</em>, </td>
-        </tr>
-        <tr>
-          <td class="paramkey"></td>
+          <td class="paramname"></td><td>)</td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
-          <td class="paramname"><em>value_index</em>, </td>
         </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">default</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+</div>
+</div>
+<a class="anchor" id="a12d0e137a00b37d439d7a10677023c62"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
         <tr>
-          <td class="paramkey"></td>
-          <td></td>
-          <td class="paramtype"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a>&#160;</td>
-          <td class="paramname"><em>dtype</em>, </td>
+          <td class="memname">tvm::tir::PrefetchNode::PrefetchNode </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>&#160;</td>
+          <td class="paramname"><em>buffer</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a>&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt; <a class="el" href="classtvm_1_1Range.html">Range</a> &gt;&#160;</td>
           <td class="paramname"><em>bounds</em>&#160;</td>
         </tr>
         <tr>
@@ -203,13 +206,14 @@ Static Public Attributes</h2></td></tr>
       </table>
   </td>
   <td class="mlabels-right">
-<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
   </tr>
 </table>
 </div><div class="memdoc">
 
 </div>
 </div>
+<h2 class="groupheader">Member Function Documentation</h2>
 <a class="anchor" id="a40defcee76278429e8c6c3c981206d36"></a>
 <div class="memitem">
 <div class="memproto">
@@ -339,12 +343,12 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a9ce8a82f03dd23e200ec6c611fddeb9c"></a>
+<a class="anchor" id="ac673cc61f7ead139d993a974db4e2cee"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="namespacetvm_1_1tir.html#a8277e2a3d81a80a4776705673df51e0a">Region</a> tvm::tir::PrefetchNode::bounds</td>
+          <td class="memname"><a class="el" href="classtvm_1_1Array.html">Array</a>&lt;<a class="el" href="classtvm_1_1Range.html">Range</a>&gt; tvm::tir::PrefetchNode::bounds</td>
         </tr>
       </table>
 </div><div class="memdoc">
@@ -353,26 +357,12 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="aaeccff8519185fc44caa07005ef56af3"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname"><a class="el" href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">DataType</a> tvm::tir::PrefetchNode::dtype</td>
-        </tr>
-      </table>
-</div><div class="memdoc">
-
-<p>The data type of the array. </p>
-
-</div>
-</div>
-<a class="anchor" id="a59428251589e635271007c8b92a051e0"></a>
+<a class="anchor" id="aeb3955b22bd5cf2675adc9bd95d69ca4"></a>
 <div class="memitem">
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1FunctionRef.html">FunctionRef</a> tvm::tir::PrefetchNode::func</td>
+          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> tvm::tir::PrefetchNode::buffer</td>
         </tr>
       </table>
 </div><div class="memdoc">
@@ -381,20 +371,6 @@ Static Public Attributes</h2></td></tr>
 
 </div>
 </div>
-<a class="anchor" id="a0c2bdc2deb363f41b0362c6a9ed4296f"></a>
-<div class="memitem">
-<div class="memproto">
-      <table class="memname">
-        <tr>
-          <td class="memname">int tvm::tir::PrefetchNode::value_index</td>
-        </tr>
-      </table>
-</div><div class="memdoc">
-
-<p>The output value index if func's value is a tuple. </p>
-
-</div>
-</div>
 <hr/>The documentation for this class was generated from the following file:<ul>
 <li>include/tvm/tir/<a class="el" href="stmt_8h_source.html">stmt.h</a></li>
 </ul>
diff --git a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg
index bffcce0..b2788b8 100644
--- a/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg
+++ b/docs/doxygen/classtvm_1_1tir_1_1PrefetchNode__coll__graph.svg
@@ -4,159 +4,131 @@
 <!-- Generated by graphviz version 2.38.0 (20140413.2041)
  -->
 <!-- Title: tvm::tir::PrefetchNode Pages: 1 -->
-<svg width="665pt" height="497pt"
- viewBox="0.00 0.00 664.50 497.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 493)">
+<svg width="504pt" height="431pt"
+ viewBox="0.00 0.00 503.50 431.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 427)">
 <title>tvm::tir::PrefetchNode</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-493 660.5,-493 660.5,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-427 499.5,-427 499.5,4 -4,4"/>
 <!-- Node1 -->
 <g id="node1" class="node"><title>Node1</title>
-<polygon fill="#bfbfbf" stroke="black" points="270.5,-0.5 270.5,-112.5 475.5,-112.5 475.5,-0.5 270.5,-0.5"/>
-<text text-anchor="middle" x="373" y="-100.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::PrefetchNode</text>
-<polyline fill="none" stroke="black" points="270.5,-93.5 475.5,-93.5 "/>
-<text text-anchor="start" x="278.5" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ value_index</text>
-<text text-anchor="start" x="278.5" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
-<polyline fill="none" stroke="black" points="270.5,-63.5 475.5,-63.5 "/>
-<text text-anchor="start" x="278.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ VisitAttrs()</text>
-<text text-anchor="start" x="278.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SEqualReduce()</text>
-<text text-anchor="start" x="278.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SHashReduce()</text>
-<text text-anchor="start" x="278.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
-<text text-anchor="start" x="278.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ make()</text>
+<polygon fill="#bfbfbf" stroke="black" points="191.5,-0.5 191.5,-112.5 396.5,-112.5 396.5,-0.5 191.5,-0.5"/>
+<text text-anchor="middle" x="294" y="-100.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::PrefetchNode</text>
+<polyline fill="none" stroke="black" points="191.5,-93.5 396.5,-93.5 "/>
+<text text-anchor="start" x="199.5" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<polyline fill="none" stroke="black" points="191.5,-74.5 396.5,-74.5 "/>
+<text text-anchor="start" x="199.5" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ VisitAttrs()</text>
+<text text-anchor="start" x="199.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SEqualReduce()</text>
+<text text-anchor="start" x="199.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ SHashReduce()</text>
+<text text-anchor="start" x="199.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ PrefetchNode()</text>
+<text text-anchor="start" x="199.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ PrefetchNode()</text>
+<text text-anchor="start" x="199.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
 </g>
 <!-- Node2 -->
 <g id="node2" class="node"><title>Node2</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1StmtNode.html" target="_top" xlink:title="Base node of all statements. ">
-<polygon fill="white" stroke="black" points="0,-226.5 0,-327.5 204,-327.5 204,-226.5 0,-226.5"/>
-<text text-anchor="middle" x="102" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::StmtNode</text>
-<polyline fill="none" stroke="black" points="0,-308.5 204,-308.5 "/>
-<text text-anchor="start" x="8" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
-<text text-anchor="start" x="8" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_sequal</text>
-<text text-anchor="start" x="8" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
-<text text-anchor="start" x="8" y="-263.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_shash</text>
-<text text-anchor="start" x="8" y="-252.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
-<polyline fill="none" stroke="black" points="0,-245.5 204,-245.5 "/>
-<text text-anchor="start" x="8" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_BASE_OBJECT_INFO()</text>
+<polygon fill="white" stroke="black" points="0,-193.5 0,-294.5 204,-294.5 204,-193.5 0,-193.5"/>
+<text text-anchor="middle" x="102" y="-282.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::StmtNode</text>
+<polyline fill="none" stroke="black" points="0,-275.5 204,-275.5 "/>
+<text text-anchor="start" x="8" y="-263.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_key</text>
+<text text-anchor="start" x="8" y="-252.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_sequal</text>
+<text text-anchor="start" x="8" y="-241.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<text text-anchor="start" x="8" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ _type_has_method_shash</text>
+<text text-anchor="start" x="8" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00">_reduce</text>
+<polyline fill="none" stroke="black" points="0,-212.5 204,-212.5 "/>
+<text text-anchor="start" x="8" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DECLARE_BASE_OBJECT_INFO()</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node1 -->
 <g id="edge1" class="edge"><title>Node2&#45;&gt;Node1</title>
-<path fill="none" stroke="midnightblue" d="M151.579,-218.86C169.938,-199.182 191.53,-177.669 213,-160 233.25,-143.335 256.354,-127.135 278.585,-112.719"/>
-<polygon fill="none" stroke="midnightblue" points="148.921,-216.579 144.708,-226.3 154.064,-221.328 148.921,-216.579"/>
+<path fill="none" stroke="midnightblue" d="M160.837,-186.155C185.045,-162.767 212.981,-135.777 236.995,-112.576"/>
+<polygon fill="none" stroke="midnightblue" points="158.212,-183.824 153.452,-193.289 163.076,-188.858 158.212,-183.824"/>
 </g>
 <!-- Node3 -->
 <g id="node3" class="node"><title>Node3</title>
-<polygon fill="white" stroke="#bfbfbf" points="78.5,-431.5 78.5,-488.5 125.5,-488.5 125.5,-431.5 78.5,-431.5"/>
-<text text-anchor="middle" x="102" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00">Object</text>
-<polyline fill="none" stroke="#bfbfbf" points="78.5,-469.5 125.5,-469.5 "/>
-<text text-anchor="middle" x="102" y="-457.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
-<polyline fill="none" stroke="#bfbfbf" points="78.5,-450.5 125.5,-450.5 "/>
-<text text-anchor="middle" x="102" y="-438.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polygon fill="white" stroke="#bfbfbf" points="78.5,-365.5 78.5,-422.5 125.5,-422.5 125.5,-365.5 78.5,-365.5"/>
+<text text-anchor="middle" x="102" y="-410.5" font-family="Helvetica,sans-Serif" font-size="10.00">Object</text>
+<polyline fill="none" stroke="#bfbfbf" points="78.5,-403.5 125.5,-403.5 "/>
+<text text-anchor="middle" x="102" y="-391.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="78.5,-384.5 125.5,-384.5 "/>
+<text text-anchor="middle" x="102" y="-372.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
 </g>
 <!-- Node3&#45;&gt;Node2 -->
 <g id="edge2" class="edge"><title>Node3&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M102,-420.876C102,-393.799 102,-357.183 102,-327.799"/>
-<polygon fill="none" stroke="midnightblue" points="98.5001,-421.115 102,-431.115 105.5,-421.115 98.5001,-421.115"/>
+<path fill="none" stroke="midnightblue" d="M102,-355.105C102,-336.693 102,-314.331 102,-294.583"/>
+<polygon fill="none" stroke="midnightblue" points="98.5001,-355.162 102,-365.162 105.5,-355.162 98.5001,-355.162"/>
 </g>
 <!-- Node4 -->
 <g id="node4" class="node"><title>Node4</title>
-<g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1FunctionRef.html" target="_top" xlink:title="reference to a function ">
-<polygon fill="white" stroke="black" points="222,-243 222,-311 372,-311 372,-243 222,-243"/>
-<text text-anchor="middle" x="297" y="-299" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::tir::FunctionRef</text>
-<polyline fill="none" stroke="black" points="222,-292 372,-292 "/>
-<text text-anchor="middle" x="297" y="-280" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
-<polyline fill="none" stroke="black" points="222,-273 372,-273 "/>
-<text text-anchor="start" x="230" y="-261" font-family="Helvetica,sans-Serif" font-size="10.00">+ TVM_DEFINE_OBJECT_REF</text>
-<text text-anchor="start" x="230" y="-250" font-family="Helvetica,sans-Serif" font-size="10.00">_METHODS()</text>
+<g id="a_node4"><a xlink:href="classtvm_1_1Array.html" target="_top" xlink:title="{tvm::Array\&lt; tvm::Range \&gt;\n||+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ operator=()\l+ operator=()\land 13 more...\l}">
+<polygon fill="white" stroke="black" points="222,-160.5 222,-327.5 366,-327.5 366,-160.5 222,-160.5"/>
+<text text-anchor="middle" x="294" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::Array&lt; tvm::Range &gt;</text>
+<polyline fill="none" stroke="black" points="222,-308.5 366,-308.5 "/>
+<text text-anchor="middle" x="294" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="black" points="222,-289.5 366,-289.5 "/>
+<text text-anchor="start" x="230" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
+<text text-anchor="start" x="230" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ operator=()</text>
+<text text-anchor="start" x="230" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ operator=()</text>
+<text text-anchor="start" x="230" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00">and 13 more...</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node1 -->
 <g id="edge3" class="edge"><title>Node4&#45;&gt;Node1</title>
-<path fill="none" stroke="#404040" d="M308.543,-242.815C319.465,-211.412 336.177,-163.365 349.847,-124.064"/>
-<polygon fill="none" stroke="#404040" points="349.856,-124.039 348.049,-117.058 353.798,-112.705 355.605,-119.686 349.856,-124.039"/>
-<text text-anchor="middle" x="363" y="-134" font-family="Helvetica,sans-Serif" font-size="10.00"> +func</text>
+<path fill="none" stroke="#404040" d="M294,-160.286C294,-148.438 294,-136.368 294,-124.862"/>
+<polygon fill="none" stroke="#404040" points="294,-124.637 290,-118.637 294,-112.637 298,-118.637 294,-124.637"/>
+<text text-anchor="middle" x="317.5" y="-134" font-family="Helvetica,sans-Serif" font-size="10.00"> +bounds</text>
 </g>
 <!-- Node5 -->
 <g id="node5" class="node"><title>Node5</title>
-<polygon fill="white" stroke="#bfbfbf" points="342,-431.5 342,-488.5 404,-488.5 404,-431.5 342,-431.5"/>
-<text text-anchor="middle" x="373" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00">ObjectRef</text>
-<polyline fill="none" stroke="#bfbfbf" points="342,-469.5 404,-469.5 "/>
-<text text-anchor="middle" x="373" y="-457.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
-<polyline fill="none" stroke="#bfbfbf" points="342,-450.5 404,-450.5 "/>
-<text text-anchor="middle" x="373" y="-438.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polygon fill="white" stroke="#bfbfbf" points="335,-365.5 335,-422.5 397,-422.5 397,-365.5 335,-365.5"/>
+<text text-anchor="middle" x="366" y="-410.5" font-family="Helvetica,sans-Serif" font-size="10.00">ObjectRef</text>
+<polyline fill="none" stroke="#bfbfbf" points="335,-403.5 397,-403.5 "/>
+<text text-anchor="middle" x="366" y="-391.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
+<polyline fill="none" stroke="#bfbfbf" points="335,-384.5 397,-384.5 "/>
+<text text-anchor="middle" x="366" y="-372.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
 </g>
 <!-- Node5&#45;&gt;Node4 -->
 <g id="edge4" class="edge"><title>Node5&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M357.256,-421.505C343.596,-388.972 324.01,-342.327 310.915,-311.141"/>
-<polygon fill="none" stroke="midnightblue" points="354.193,-423.25 361.292,-431.115 360.647,-420.54 354.193,-423.25"/>
+<path fill="none" stroke="midnightblue" d="M348.026,-356.053C343.762,-347.288 339.043,-337.588 334.222,-327.678"/>
+<polygon fill="none" stroke="midnightblue" points="344.935,-357.701 352.457,-365.162 351.23,-354.639 344.935,-357.701"/>
 </g>
 <!-- Node6 -->
 <g id="node6" class="node"><title>Node6</title>
-<g id="a_node6"><a xlink:href="classtvm_1_1Array.html" target="_top" xlink:title="{tvm::Array\&lt; Range \&gt;\n||+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ operator=()\l+ operator=()\land 13 more...\l}">
-<polygon fill="white" stroke="black" points="390.5,-193.5 390.5,-360.5 509.5,-360.5 509.5,-193.5 390.5,-193.5"/>
-<text text-anchor="middle" x="450" y="-348.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm::Array&lt; Range &gt;</text>
-<polyline fill="none" stroke="black" points="390.5,-341.5 509.5,-341.5 "/>
-<text text-anchor="middle" x="450" y="-329.5" font-family="Helvetica,sans-Serif" font-size="10.00"> </text>
-<polyline fill="none" stroke="black" points="390.5,-322.5 509.5,-322.5 "/>
-<text text-anchor="start" x="398.5" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
-<text text-anchor="start" x="398.5" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
-<text text-anchor="start" x="398.5" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
-<text text-anchor="start" x="398.5" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
-<text text-anchor="start" x="398.5" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</text>
-<text text-anchor="start" x="398.5" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ Array()</t