ambari-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Ashok (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (AMBARI-12628) When HDFS HA enabled with Ambari 2.1, several service failed to start
Date Wed, 21 Oct 2015 06:49:27 GMT

    [ https://issues.apache.org/jira/browse/AMBARI-12628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14966331#comment-14966331
] 

Ashok commented on AMBARI-12628:
--------------------------------

I hit the same issue, is there a temporary work around to solve this ? 

This is how my error looks like

Traceback (most recent call last):
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
line 40, in get_value_from_jmx
    data_dict = json.loads(data)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
    raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Traceback (most recent call last):
  File "/var/lib/ambari-agent/cache/stacks/BigInsights/4.0/services/HBASE/package/scripts/hbase_regionserver.py",
line 131, in <module>
    HbaseRegionServer().execute()
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
line 218, in execute
    method(env)
  File "/var/lib/ambari-agent/cache/stacks/BigInsights/4.0/services/HBASE/package/scripts/hbase_regionserver.py",
line 58, in start
    self.configure(env) # for security
  File "/var/lib/ambari-agent/cache/stacks/BigInsights/4.0/services/HBASE/package/scripts/hbase_regionserver.py",
line 43, in configure
    hbase(name='regionserver')
  File "/var/lib/ambari-agent/cache/stacks/BigInsights/4.0/services/HBASE/package/scripts/hbase.py",
line 169, in hbase
    owner=params.hbase_user
  File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 157, in __init__
    self.env.run()
  File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 152,
in run
    self.run_action(resource, action)
  File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 118,
in run_action
    provider_action()
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 390, in action_create_on_execute
    self.action_delayed("create")
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 387, in action_delayed
    self.get_hdfs_resource_executor().action_delayed(action_name, self)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 239, in action_delayed
    main_resource.resource.security_enabled, main_resource.resource.logoutput)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 126, in __init__
    security_enabled, run_user)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/namenode_ha_utils.py",
line 113, in get_property_for_active_namenode
    raise Fail("There is no active namenodes.")
resource_management.core.exceptions.Fail: There is no active namenodes.



> When HDFS HA enabled with Ambari 2.1, several service failed to start 
> ----------------------------------------------------------------------
>
>                 Key: AMBARI-12628
>                 URL: https://issues.apache.org/jira/browse/AMBARI-12628
>             Project: Ambari
>          Issue Type: Bug
>          Components: ambari-server
>    Affects Versions: 2.1.0
>            Reporter: Vincent.He
>            Priority: Critical
>
> Install Ambari 2.1 with HDP 2.3, when enabled HA for HDFS, serval service failed to start
like mapredurce history server, dig into more detail, it is different from issue 12374.
> The issues reported is failed to decoded the JSON string,
> 2015-08-03 02:09:35,420 - Getting jmx metrics from NN failed. URL: http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
line 40, in get_value_from_jmx
>     data_dict = json.loads(data)
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in
loads
>     return _default_decoder.decode(s)
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in
decode
>     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in
raw_decode
>     raise ValueError("No JSON object could be decoded")
> ValueError: No JSON object could be decoded
> 2015-08-03 02:09:35,494 - Getting jmx metrics from NN failed. URL: http://h02.bigdata.lenovo.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py",
line 40, in get_value_from_jmx
>     data_dict = json.loads(data)
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in
loads
>     return _default_decoder.decode(s)
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in
decode
>     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
>   File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in
raw_decode
>     raise ValueError("No JSON object could be decoded")
> ValueError: No JSON object could be decoded
> Traceback (most recent call last):
>   File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py",
line 168, in <module>
>     HistoryServer().execute()
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py",
line 218, in execute
>     method(env)
>   File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py",
line 91, in start
>     self.configure(env) # FOR SECURITY
>   File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py",
line 55, in configure
>     yarn(name="historyserver")
>   File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line 89,
in thunk
>     return fn(*args, **kwargs)
>   File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/yarn.py",
line 72, in yarn
>     recursive_chmod=True
>   File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 157,
in __init__
>     self.env.run()
>   File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line
152, in run
>     self.run_action(resource, action)
>   File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line
118, in run_action
>     provider_action()
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 390, in action_create_on_execute
>     self.action_delayed("create")
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 387, in action_delayed
>     self.get_hdfs_resource_executor().action_delayed(action_name, self)
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 239, in action_delayed
>     main_resource.resource.security_enabled, main_resource.resource.logoutput)
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py",
line 126, in __init__
>     security_enabled, run_user)
>   File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/namenode_ha_utils.py",
line 113, in get_property_for_active_namenode
>     raise Fail("There is no active namenodes.")
> resource_management.core.exceptions.Fail: There is no active namenodes.
> The key issue is "File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py",
line 353, in raw_decode
>     raise ValueError("No JSON object could be decoded")
> "
> The output I got is 
> [root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> {
>   "beans" : [ {
>     "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
>     "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
>     "State" : "standby",
>     "NNRole" : "NameNode",
>     "HostAndPort" : "h03.bigdata.com:8020",
>     "SecurityEnabled" : false,
>     "LastHATransitionTime" : 1438594046119
>   } ]
> }
> [root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> {
>   "beans" : [ {
>     "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
>     "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
>     "State" : "active",
>     "NNRole" : "NameNode",
>     "HostAndPort" : "h02.bigdata.com:8020",
>     "SecurityEnabled" : false,
>     "LastHATransitionTime" : 1438594046591
>   } ]
> }
> I also tried the patch in issue AMBARI-12374, got the same error, and the new URI, I
got response,
> [root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
> {
>   "beans" : [ {
>     "name" : "Hadoop:service=NameNode,name=FSNamesystem",
>     "modelerType" : "FSNamesystem",
>     "tag.Context" : "dfs",
>     "tag.HAState" : "standby",
>     "tag.Hostname" : "h03.bigdata.com",
>     "MissingBlocks" : 0,
>     "MissingReplOneBlocks" : 0,
>     "ExpiredHeartbeats" : 0,
>     "TransactionsSinceLastCheckpoint" : -756,
>     "TransactionsSinceLastLogRoll" : 0,
>     "LastWrittenTransactionId" : 5760,
>     "LastCheckpointTime" : 1438637246806,
>     "CapacityTotal" : 377945479446528,
>     "CapacityTotalGB" : 351989.0,
>     "CapacityUsed" : 2162847744,
>     "CapacityUsedGB" : 2.0,
>     "CapacityRemaining" : 374078076620800,
>     "CapacityRemainingGB" : 348387.0,
>     "CapacityUsedNonDFS" : 3865239977984,
>     "TotalLoad" : 16,
>     "SnapshottableDirectories" : 0,
>     "Snapshots" : 0,
>     "BlocksTotal" : 588,
>     "FilesTotal" : 825,
>     "PendingReplicationBlocks" : 0,
>     "UnderReplicatedBlocks" : 0,
>     "CorruptBlocks" : 0,
>     "ScheduledReplicationBlocks" : 0,
>     "PendingDeletionBlocks" : 0,
>     "ExcessBlocks" : 0,
>     "PostponedMisreplicatedBlocks" : 0,
>     "PendingDataNodeMessageCount" : 0,
>     "MillisSinceLastLoadedEdits" : 49071,
>     "BlockCapacity" : 2097152,
>     "StaleDataNodes" : 0,
>     "TotalFiles" : 825
>   } ]
> }
> [root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
> {
>   "beans" : [ {
>     "name" : "Hadoop:service=NameNode,name=FSNamesystem",
>     "modelerType" : "FSNamesystem",
>     "tag.Context" : "dfs",
>     "tag.HAState" : "active",
>     "tag.Hostname" : "h02.bigdata.com",
>     "MissingBlocks" : 0,
>     "MissingReplOneBlocks" : 0,
>     "ExpiredHeartbeats" : 0,
>     "TransactionsSinceLastCheckpoint" : 227,
>     "TransactionsSinceLastLogRoll" : 1,
>     "LastWrittenTransactionId" : 6743,
>     "LastCheckpointTime" : 1438637246983,
>     "CapacityTotal" : 377945479446528,
>     "CapacityTotalGB" : 351989.0,
>     "CapacityUsed" : 2162847744,
>     "CapacityUsedGB" : 2.0,
>     "CapacityRemaining" : 374078076620800,
>     "CapacityRemainingGB" : 348387.0,
>     "CapacityUsedNonDFS" : 3865239977984,
>     "TotalLoad" : 16,
>     "SnapshottableDirectories" : 0,
>     "Snapshots" : 0,
>     "BlocksTotal" : 588,
>     "FilesTotal" : 825,
>     "PendingReplicationBlocks" : 0,
>     "UnderReplicatedBlocks" : 0,
>     "CorruptBlocks" : 0,
>     "ScheduledReplicationBlocks" : 0,
>     "PendingDeletionBlocks" : 0,
>     "ExcessBlocks" : 0,
>     "PostponedMisreplicatedBlocks" : 0,
>     "PendingDataNodeMessageCount" : 0,
>     "MillisSinceLastLoadedEdits" : 0,
>     "BlockCapacity" : 2097152,
>     "StaleDataNodes" : 0,
>     "TotalFiles" : 825
>   } ]
> }



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message