kylin-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Billy Liu <billy...@apache.org>
Subject Re: kylin1.6 集群有些时候创建cube不能同步到其他节点上
Date Thu, 14 Dec 2017 15:39:15 GMT
Hello Chenping,

Quite a few metadata sync related issues are resolved recently, including:
https://issues.apache.org/jira/browse/KYLIN-2834
https://issues.apache.org/jira/browse/KYLIN-2858
https://issues.apache.org/jira/browse/KYLIN-2834

You are strongly suggested to upgrade to the latest Kylin 2.2

2017-12-12 18:12 GMT+08:00 chenping_cd@keruyun.com <chenping_cd@keruyun.com>
:

> 大家好,最近遇到一个问题,kylin1.6  cdh5.8.4
> 其中管理的kylin节点配置如下
> ### SERVICE ###
>
> # Kylin server mode, valid value [all, query, job]
> kyin.server.mode=all
>
> # Optional information for the owner of kylin platform,
> it can be your team's email
> # Currently it will be attached to each kylin's htable attribute
> kylin.owner=whoami@kylin.apache.org
>
> # List of web servers in use, this enables one web server
> instance to sync up with other servers.
> #kylin.rest.servers=localhost:7070
> kylin.rest.servers=prod-slave5:7070,prod-slave4:7070,prod-slave3:7070
>
> # Display timezone on UI,format like[GMT+N or GMT-N]
> kylin.rest.timezone=GMT+8
>
> ### SOURCE ###
>
> # Hive client, valid value [cli, beeline]
> kylin.hive.client=cli
>
> # Parameters for beeline client, only necessary if hive client is beeline
> #kylin.hive.beeline.params=-n root --hiveconf hive.security.
> authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u '
> jdbc:hive2://localhost:
> 10000'
>
> kylin.hive.keep.flat.table=false
>
> ### STORAGE ###
>
> # The metadata store in hbase
> kylin.metadata.url=kylin_metadata@hbase
>
> # The storage for final cube file in hbase
> kylin.storage.url=hbase
>
> # Working folder in HDFS, make sure user has the right
> access to the hdfs directory
> kylin.hdfs.working.dir=/kylin
>
> # Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
> kylin.hbase.default.compression.codec=none
>
> # HBase Cluster FileSystem, which serving hbase, format
> as hdfs://hbase-cluster:8020
> # Leave empty if hbase running on same cluster with hive and mapreduce
> #kylin.hbase.cluster.fs=
>
> # The cut size for hbase region, in GB.
> kylin.hbase.region.cut=5
>
> # The hfile size of GB, smaller hfile leading to the
> converting hfile MR has more reducers and be faster.
> # Set 0 to disable this optimization.
> kylin.hbase.hfile.size.gb=2
>
> kylin.hbase.region.count.min=1
> kylin.hbase.region.count.max=500
>
> ### JOB ###
>
> # max job retry on error, default 0: no retry
> kylin.job.retry=0
>
> # If true, job engine will not assume that hadoop CLI
> reside on the same server as it self
> # you will have to specify kylin.job.remote.cli.hostname,
>  kylin.job.remote.cli.username and kylin.job.remote.cli.password
> # It should not be set to "true" unless you're NOT
> running Kylin.sh on a hadoop client machine
> # (Thus kylin instance has to ssh to another real hadoop
> client machine to execute hbase,hive,hadoop commands)
> kylin.job.run.as.remote.cmd=false
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.hostname=
> kylin.job.remote.cli.port=22
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.username=
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.password=
>
> # Used by test cases to prepare synthetic data for sample cube
> kylin.job.remote.cli.working.dir=/tmp/kylin
>
> # Max count of concurrent jobs running
> kylin.job.concurrent.max.limit=10
>
> # Time interval to check hadoop job status
> kylin.job.yarn.app.rest.check.interval.seconds=10
>
> # Hive database name for putting the intermediate flat tables
> kylin.job.hive.database.for.intermediatetable=default
>
> # The percentage of the sampling, default 100%
> kylin.job.cubing.inmem.sampling.percent=100
>
> # Whether get job status from resource manager with
> kerberos authentication
> kylin.job.status.with.kerberos=false
>
> kylin.job.mapreduce.default.reduce.input.mb=500
>
> kylin.job.mapreduce.max.reducer.number=500
>
> kylin.job.mapreduce.mapper.input.rows=1000000
>
> kylin.job.step.timeout=7200
>
> ### CUBE ###
>
> # 'auto', 'inmem', 'layer' or 'random' for testing
> kylin.cube.algorithm=auto
>
> kylin.cube.algorithm.auto.threshold=8
>
> kylin.cube.aggrgroup.max.combination=4096
>
> kylin.dictionary.max.cardinality=5000000
>
> kylin.table.snapshot.max_mb=300
>
> ### QUERY ###
>
> kylin.query.scan.threshold=10000000
>
> # 3G
> kylin.query.mem.budget=3221225472
>
> kylin.query.coprocessor.mem.gb=3
>
> # the default coprocessor timeout is (hbase.rpc.timeout
> * 0.9) / 1000 seconds,
> # you can set it to a smaller value. 0 means use default.
> # kylin.query.coprocessor.timeout.seconds=0
>
> # Enable/disable ACL check for cube query
> kylin.query.security.enabled=true
>
> kylin.query.cache.enabled=true
>
> ### SECURITY ###
>
> # Spring security profile, options: testing, ldap, saml
> # with "testing" profile, user can use pre-defined name/
> pwd like KYLIN/ADMIN to login
> kylin.security.profile=testing
>
> ### SECURITY ###
> # Default roles and admin roles in LDAP, for ldap and saml
> acl.defaultRole=ROLE_ANALYST,ROLE_MODELER
> acl.adminRole=ROLE_ADMIN
>
> # LDAP authentication configuration
> ldap.server=ldap://ldap_server:389
> ldap.username=
> ldap.password=
>
> # LDAP user account directory;
> ldap.user.searchBase=
> ldap.user.searchPattern=
> ldap.user.groupSearchBase=
>
> # LDAP service account directory
> ldap.service.searchBase=
> ldap.service.searchPattern=
> ldap.service.groupSearchBase=
>
> ## SAML configurations for SSO
> # SAML IDP metadata file location
> saml.metadata.file=classpath:sso_metadata.xml
> saml.metadata.entityBaseURL=https://hostname/kylin
> saml.context.scheme=https
> saml.context.serverName=hostname
> saml.context.serverPort=443
> saml.context.contextPath=/kylin
>
> ### MAIL ###
>
> # If true, will send email notification;
> mail.enabled=false
> mail.host=
> mail.username=
> mail.password=
> mail.sender=
> ### WEB ###
>
> # Help info, format{name|displayName|link}, optional
> kylin.web.help.length=4
> kylin.web.help.0=start|Getting Started|
> kylin.web.help.1=odbc|ODBC Driver|
> kylin.web.help.2=tableau|Tableau Guide|
> kylin.web.help.3=onboard|Cube Design Tutorial|
>
> # Guide user how to build streaming cube
> kylin.web.streaming.guide=http://kylin.apache.org/
>
> # Hadoop url link, optional
> kylin.web.hadoop=
> #job diagnostic url link, optional
> kylin.web.diagnostic=
> #contact mail on web page, optional
> kylin.web.contact_mail=
>
> crossdomain.enable=true
>
> ### OTHER ###
>
> # Env DEV|QA|PROD
> deploy.env=QA
> [hadoop@prod-slave5 kylin]$ more conf/kylin.properties
> #
> # Licensed to the Apache Software Foundation (ASF) under one or more
> # contributor license agreements.  See the NOTICE file distributed with
> # this work for additional information regarding copyright ownership.
> # The ASF licenses this file to You under the Apache License, Version 2.0
> # (the "License"); you may not use this file except in compliance with
> # the License.  You may obtain a copy of the License at
> #
> #    http://www.apache.org/licenses/LICENSE-2.0
> #
> # Unless required by applicable law or agreed to in writing, software
> # distributed under the License is distributed on an "AS IS" BASIS,
> # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> # See the License for the specific language governing permissions and
> # limitations under the License.
> #
>
> ### SERVICE ###
>
> # Kylin server mode, valid value [all, query, job]
> kyin.server.mode=all
>
> # Optional information for the owner of kylin platform,
> it can be your team's email
> # Currently it will be attached to each kylin's htable attribute
> kylin.owner=whoami@kylin.apache.org
>
> # List of web servers in use, this enables one web server
> instance to sync up with other servers.
> #kylin.rest.servers=localhost:7070
> kylin.rest.servers=prod-slave5:7070,prod-slave4:7070,prod-slave3:7070
>
> # Display timezone on UI,format like[GMT+N or GMT-N]
> kylin.rest.timezone=GMT+8
>
> ### SOURCE ###
>
> # Hive client, valid value [cli, beeline]
> kylin.hive.client=cli
>
> # Parameters for beeline client, only necessary if hive client is beeline
> #kylin.hive.beeline.params=-n root --hiveconf hive.security.
> authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u '
> jdbc:hive2://localhost:
> 10000'
>
> kylin.hive.keep.flat.table=false
>
> ### STORAGE ###
>
> # The metadata store in hbase
> kylin.metadata.url=kylin_metadata@hbase
>
> # The storage for final cube file in hbase
> kylin.storage.url=hbase
>
> # Working folder in HDFS, make sure user has the right
> access to the hdfs directory
> kylin.hdfs.working.dir=/kylin
>
> # Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
> kylin.hbase.default.compression.codec=none
>
> # HBase Cluster FileSystem, which serving hbase, format
> as hdfs://hbase-cluster:8020
> # Leave empty if hbase running on same cluster with hive and mapreduce
> #kylin.hbase.cluster.fs=
>
> # The cut size for hbase region, in GB.
> kylin.hbase.region.cut=5
>
> # The hfile size of GB, smaller hfile leading to the
> converting hfile MR has more reducers and be faster.
> # Set 0 to disable this optimization.
> kylin.hbase.hfile.size.gb=2
>
> kylin.hbase.region.count.min=1
> kylin.hbase.region.count.max=500
>
> ### JOB ###
>
> # max job retry on error, default 0: no retry
> kylin.job.retry=0
>
> # If true, job engine will not assume that hadoop CLI
> reside on the same server as it self
> # you will have to specify kylin.job.remote.cli.hostname,
>  kylin.job.remote.cli.username and kylin.job.remote.cli.password
> # It should not be set to "true" unless you're NOT
> running Kylin.sh on a hadoop client machine
> # (Thus kylin instance has to ssh to another real hadoop
> client machine to execute hbase,hive,hadoop commands)
> kylin.job.run.as.remote.cmd=false
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.hostname=
> kylin.job.remote.cli.port=22
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.username=
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.password=
>
> # Used by test cases to prepare synthetic data for sample cube
> kylin.job.remote.cli.working.dir=/tmp/kylin
>
> # Max count of concurrent jobs running
> kylin.job.concurrent.max.limit=10
>
> # Time interval to check hadoop job status
> kylin.job.yarn.app.rest.check.interval.seconds=10
>
> # Hive database name for putting the intermediate flat tables
> kylin.job.hive.database.for.intermediatetable=default
>
> # The percentage of the sampling, default 100%
> kylin.job.cubing.inmem.sampling.percent=100
>
> # Whether get job status from resource manager with
> kerberos authentication
> kylin.job.status.with.kerberos=false
>
> kylin.job.mapreduce.default.reduce.input.mb=500
>
> kylin.job.mapreduce.max.reducer.number=500
>
> kylin.job.mapreduce.mapper.input.rows=1000000
>
> kylin.job.step.timeout=7200
>
> ### CUBE ###
>
> # 'auto', 'inmem', 'layer' or 'random' for testing
> kylin.cube.algorithm=auto
>
> kylin.cube.algorithm.auto.threshold=8
>
> kylin.cube.aggrgroup.max.combination=4096
>
> kylin.dictionary.max.cardinality=5000000
>
> kylin.table.snapshot.max_mb=300
>
> ### QUERY ###
>
> kylin.query.scan.threshold=10000000
>
> # 3G
> kylin.query.mem.budget=3221225472
>
> kylin.query.coprocessor.mem.gb=3
>
> # the default coprocessor timeout is (hbase.rpc.timeout
> * 0.9) / 1000 seconds,
> # you can set it to a smaller value. 0 means use default.
> # kylin.query.coprocessor.timeout.seconds=0
>
> # Enable/disable ACL check for cube query
> kylin.query.security.enabled=true
>
> kylin.query.cache.enabled=true
>
> ### SECURITY ###
>
> # Spring security profile, options: testing, ldap, saml
> # with "testing" profile, user can use pre-defined name/
> pwd like KYLIN/ADMIN to login
> kylin.security.profile=testing
>
> ### SECURITY ###
> # Default roles and admin roles in LDAP, for ldap and saml
> acl.defaultRole=ROLE_ANALYST,ROLE_MODELER
> acl.adminRole=ROLE_ADMIN
>
> # LDAP authentication configuration
> ldap.server=ldap://ldap_server:389
> ldap.username=
> ldap.password=
>
> # LDAP user account directory;
> ldap.user.searchBase=
> ldap.user.searchPattern=
> ldap.user.groupSearchBase=
>
> # LDAP service account directory
> ldap.service.searchBase=
> ldap.service.searchPattern=
> ldap.service.groupSearchBase=
>
> ## SAML configurations for SSO
> # SAML IDP metadata file location
> saml.metadata.file=classpath:sso_metadata.xml
> saml.metadata.entityBaseURL=https://hostname/kylin
> saml.context.scheme=https
> saml.context.serverName=hostname
> saml.context.serverPort=443
> saml.context.contextPath=/kylin
>
> ### MAIL ###
>
> # If true, will send email notification;
> mail.enabled=false
> mail.host=
> mail.username=
> mail.password=
> mail.sender=
> ### WEB ###
>
> # Help info, format{name|displayName|link}, optional
> kylin.web.help.length=4
> kylin.web.help.0=start|Getting Started|
> kylin.web.help.1=odbc|ODBC Driver|
> kylin.web.help.2=tableau|Tableau Guide|
> kylin.web.help.3=onboard|Cube Design Tutorial|
>
> # Guide user how to build streaming cube
> kylin.web.streaming.guide=http://kylin.apache.org/
>
> # Hadoop url link, optional
> kylin.web.hadoop=
> #job diagnostic url link, optional
> kylin.web.diagnostic=
> #contact mail on web page, optional
> kylin.web.contact_mail=
>
> crossdomain.enable=true
>
> ### OTHER ###
>
> # Env DEV|QA|PROD
> deploy.env=QA
>
>
> 其中另外两个query 节点配置如下
> ### SERVICE ###
>
> # Kylin server mode, valid value [all, query, job]
> kyin.server.mode=query
>
> # Optional information for the owner of kylin platform,
> it can be your team's email
> # Currently it will be attached to each kylin's htable attribute
> kylin.owner=whoami@kylin.apache.org
>
> # List of web servers in use, this enables one web server
> instance to sync up with other servers.
> #kylin.rest.servers=localhost:7070
> kylin.rest.servers=prod-slave5:7070,prod-slave4:7070,prod-slave3:7070
>
> # Display timezone on UI,format like[GMT+N or GMT-N]
> kylin.rest.timezone=GMT+8
>
> ### SOURCE ###
>
> # Hive client, valid value [cli, beeline]
> kylin.hive.client=cli
>
> # Parameters for beeline client, only necessary if hive client is beeline
> #kylin.hive.beeline.params=-n root --hiveconf hive.security.
> authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u '
> jdbc:hive2://localhost:
> 10000'
>
> kylin.hive.keep.flat.table=false
>
> ### STORAGE ###
>
> # The metadata store in hbase
> kylin.metadata.url=kylin_metadata@hbase
>
> # The storage for final cube file in hbase
> kylin.storage.url=hbase
>
> # Working folder in HDFS, make sure user has the right
> access to the hdfs directory
> kylin.hdfs.working.dir=/kylin
>
> # Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
> kylin.hbase.default.compression.codec=none
>
> # HBase Cluster FileSystem, which serving hbase, format
> as hdfs://hbase-cluster:8020
> # Leave empty if hbase running on same cluster with hive and mapreduce
> #kylin.hbase.cluster.fs=
>
> # The cut size for hbase region, in GB.
> kylin.hbase.region.cut=5
>
> # The hfile size of GB, smaller hfile leading to the
> converting hfile MR has more reducers and be faster.
> # Set 0 to disable this optimization.
> kylin.hbase.hfile.size.gb=2
>
> kylin.hbase.region.count.min=1
> kylin.hbase.region.count.max=500
>
> ### JOB ###
>
> # max job retry on error, default 0: no retry
> kylin.job.retry=0
>
> # If true, job engine will not assume that hadoop CLI
> reside on the same server as it self
> # you will have to specify kylin.job.remote.cli.hostname,
>  kylin.job.remote.cli.username and kylin.job.remote.cli.password
> # It should not be set to "true" unless you're NOT
> running Kylin.sh on a hadoop client machine
> # (Thus kylin instance has to ssh to another real hadoop
> client machine to execute hbase,hive,hadoop commands)
> kylin.job.run.as.remote.cmd=false
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.hostname=
> kylin.job.remote.cli.port=22
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.username=
>
> # Only necessary when kylin.job.run.as.remote.cmd=true
> kylin.job.remote.cli.password=
>
> # Used by test cases to prepare synthetic data for sample cube
> kylin.job.remote.cli.working.dir=/tmp/kylin
>
> # Max count of concurrent jobs running
> kylin.job.concurrent.max.limit=10
>
> # Time interval to check hadoop job status
> kylin.job.yarn.app.rest.check.interval.seconds=10
>
> # Hive database name for putting the intermediate flat tables
> kylin.job.hive.database.for.intermediatetable=default
>
> # The percentage of the sampling, default 100%
> kylin.job.cubing.inmem.sampling.percent=100
>
> # Whether get job status from resource manager with
> kerberos authentication
> kylin.job.status.with.kerberos=false
>
> kylin.job.mapreduce.default.reduce.input.mb=500
>
> kylin.job.mapreduce.max.reducer.number=500
>
> kylin.job.mapreduce.mapper.input.rows=1000000
>
> kylin.job.step.timeout=7200
>
> ### CUBE ###
>
> # 'auto', 'inmem', 'layer' or 'random' for testing
> kylin.cube.algorithm=auto
>
> kylin.cube.algorithm.auto.threshold=8
>
> kylin.cube.aggrgroup.max.combination=4096
>
> kylin.dictionary.max.cardinality=5000000
>
> kylin.table.snapshot.max_mb=300
>
> ### QUERY ###
>
> kylin.query.scan.threshold=10000000
>
> # 3G
> kylin.query.mem.budget=3221225472
>
> kylin.query.coprocessor.mem.gb=3
>
> # the default coprocessor timeout is (hbase.rpc.timeout
> * 0.9) / 1000 seconds,
> # you can set it to a smaller value. 0 means use default.
> # kylin.query.coprocessor.timeout.seconds=0
>
> # Enable/disable ACL check for cube query
> kylin.query.security.enabled=true
>
> kylin.query.cache.enabled=true
>
> ### SECURITY ###
>
> # Spring security profile, options: testing, ldap, saml
> # with "testing" profile, user can use pre-defined name/
> pwd like KYLIN/ADMIN to login
> kylin.security.profile=testing
>
> ### SECURITY ###
> # Default roles and admin roles in LDAP, for ldap and saml
> acl.defaultRole=ROLE_ANALYST,ROLE_MODELER
> acl.adminRole=ROLE_ADMIN
>
> # LDAP authentication configuration
> ldap.server=ldap://ldap_server:389
> ldap.username=
> ldap.password=
>
> # LDAP user account directory;
> ldap.user.searchBase=
> ldap.user.searchPattern=
> ldap.user.groupSearchBase=
>
> # LDAP service account directory
> ldap.service.searchBase=
> ldap.service.searchPattern=
> ldap.service.groupSearchBase=
>
> ## SAML configurations for SSO
> # SAML IDP metadata file location
> saml.metadata.file=classpath:sso_metadata.xml
> saml.metadata.entityBaseURL=https://hostname/kylin
> saml.context.scheme=https
> saml.context.serverName=hostname
> saml.context.serverPort=443
> saml.context.contextPath=/kylin
>
> ### MAIL ###
>
> # If true, will send email notification;
> mail.enabled=false
> mail.host=
> mail.username=
> mail.password=
> mail.sender=
> ### WEB ###
>
> # Help info, format{name|displayName|link}, optional
> kylin.web.help.length=4
> kylin.web.help.0=start|Getting Started|
> kylin.web.help.1=odbc|ODBC Driver|
> kylin.web.help.2=tableau|Tableau Guide|
> kylin.web.help.3=onboard|Cube Design Tutorial|
>
> # Guide user how to build streaming cube
> kylin.web.streaming.guide=http://kylin.apache.org/
>
> # Hadoop url link, optional
> kylin.web.hadoop=
> #job diagnostic url link, optional
> kylin.web.diagnostic=
> #contact mail on web page, optional
> kylin.web.contact_mail=
>
> crossdomain.enable=true
>
> ### OTHER ###
>
> # Env DEV|QA|PROD
> deploy.env=QA
>
>
> 为什么创建的CUBE有些时候不能同步到集群的两个query几点呢?每次删除CUBE,集群的两个query节点都会出问题,
> 只有重启,各位帮忙看看配置有没有什么问题
>
> ------------------------------
>
> 陈平  DBA工程师
>
>
>
> 成都时时客科技有限责任公司
>
> 地址:成都市高新区天府大道1268号1栋3层
>
> 邮编:610041
>
> 手机:15108456581 <151%200845%206581>
>
> 在线:QQ 625852056
>
> 官网:www.keruyun.com
>
> 客服:4006-315-666
>
>
>

Mime
View raw message