flink-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Simone Robutti <simone.robu...@radicalbit.io>
Subject Error related to JMX and metrics
Date Wed, 25 May 2016 12:22:43 GMT
Running a job I encounter the following error that leads the job to failure
in the end.

14:09:34,722 ERROR org.apache.flink.metrics.reporter.JMXReporter
      - A metric with the name
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numBytesIn
was already registered.
javax.management.InstanceAlreadyExistsException:
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numBytesIn
at com.sun.jmx.mbeanserver.Repository.addMBean(Repository.java:437)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerWithRepository(DefaultMBeanServerInterceptor.java:1898)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerDynamicMBean(DefaultMBeanServerInterceptor.java:966)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerObject(DefaultMBeanServerInterceptor.java:900)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:324)
at
com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522)
at
org.apache.flink.metrics.reporter.JMXReporter.notifyOfAddedMetric(JMXReporter.java:76)
at org.apache.flink.metrics.MetricRegistry.register(MetricRegistry.java:177)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.addMetric(AbstractMetricGroup.java:191)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.counter(AbstractMetricGroup.java:144)
at
org.apache.flink.metrics.groups.IOMetricGroup.<init>(IOMetricGroup.java:40)
at
org.apache.flink.metrics.groups.TaskMetricGroup.<init>(TaskMetricGroup.java:68)
at
org.apache.flink.metrics.groups.JobMetricGroup.addTask(JobMetricGroup.java:74)
at
org.apache.flink.metrics.groups.TaskManagerMetricGroup.addTaskForJob(TaskManagerMetricGroup.java:86)
at
org.apache.flink.runtime.taskmanager.TaskManager.submitTask(TaskManager.scala:1092)
at org.apache.flink.runtime.taskmanager.TaskManager.org
$apache$flink$runtime$taskmanager$TaskManager$$handleTaskMessage(TaskManager.scala:441)
at
org.apache.flink.runtime.taskmanager.TaskManager$$anonfun$handleMessage$1.applyOrElse(TaskManager.scala:283)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at
org.apache.flink.runtime.LeaderSessionMessageFilter$$anonfun$receive$1.applyOrElse(LeaderSessionMessageFilter.scala:36)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:33)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:28)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118)
at
org.apache.flink.runtime.LogMessages$$anon$1.applyOrElse(LogMessages.scala:28)
at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
at
org.apache.flink.runtime.taskmanager.TaskManager.aroundReceive(TaskManager.scala:124)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:254)
at akka.dispatch.Mailbox.run(Mailbox.scala:221)
at akka.dispatch.Mailbox.exec(Mailbox.scala:231)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
14:09:34,728 ERROR org.apache.flink.metrics.reporter.JMXReporter
      - A metric with the name
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numBytesOut
was already registered.
javax.management.InstanceAlreadyExistsException:
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numBytesOut
at com.sun.jmx.mbeanserver.Repository.addMBean(Repository.java:437)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerWithRepository(DefaultMBeanServerInterceptor.java:1898)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerDynamicMBean(DefaultMBeanServerInterceptor.java:966)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerObject(DefaultMBeanServerInterceptor.java:900)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:324)
at
com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522)
at
org.apache.flink.metrics.reporter.JMXReporter.notifyOfAddedMetric(JMXReporter.java:76)
at org.apache.flink.metrics.MetricRegistry.register(MetricRegistry.java:177)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.addMetric(AbstractMetricGroup.java:191)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.counter(AbstractMetricGroup.java:144)
at
org.apache.flink.metrics.groups.IOMetricGroup.<init>(IOMetricGroup.java:41)
at
org.apache.flink.metrics.groups.TaskMetricGroup.<init>(TaskMetricGroup.java:68)
at
org.apache.flink.metrics.groups.JobMetricGroup.addTask(JobMetricGroup.java:74)
at
org.apache.flink.metrics.groups.TaskManagerMetricGroup.addTaskForJob(TaskManagerMetricGroup.java:86)
at
org.apache.flink.runtime.taskmanager.TaskManager.submitTask(TaskManager.scala:1092)
at org.apache.flink.runtime.taskmanager.TaskManager.org
$apache$flink$runtime$taskmanager$TaskManager$$handleTaskMessage(TaskManager.scala:441)
at
org.apache.flink.runtime.taskmanager.TaskManager$$anonfun$handleMessage$1.applyOrElse(TaskManager.scala:283)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at
org.apache.flink.runtime.LeaderSessionMessageFilter$$anonfun$receive$1.applyOrElse(LeaderSessionMessageFilter.scala:36)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:33)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:28)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118)
at
org.apache.flink.runtime.LogMessages$$anon$1.applyOrElse(LogMessages.scala:28)
at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
at
org.apache.flink.runtime.taskmanager.TaskManager.aroundReceive(TaskManager.scala:124)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:254)
at akka.dispatch.Mailbox.run(Mailbox.scala:221)
at akka.dispatch.Mailbox.exec(Mailbox.scala:231)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
14:09:34,730 ERROR org.apache.flink.metrics.reporter.JMXReporter
      - A metric with the name
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numRecordsIn
was already registered.
javax.management.InstanceAlreadyExistsException:
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numRecordsIn
at com.sun.jmx.mbeanserver.Repository.addMBean(Repository.java:437)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerWithRepository(DefaultMBeanServerInterceptor.java:1898)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerDynamicMBean(DefaultMBeanServerInterceptor.java:966)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerObject(DefaultMBeanServerInterceptor.java:900)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:324)
at
com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522)
at
org.apache.flink.metrics.reporter.JMXReporter.notifyOfAddedMetric(JMXReporter.java:76)
at org.apache.flink.metrics.MetricRegistry.register(MetricRegistry.java:177)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.addMetric(AbstractMetricGroup.java:191)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.counter(AbstractMetricGroup.java:144)
at
org.apache.flink.metrics.groups.IOMetricGroup.<init>(IOMetricGroup.java:42)
at
org.apache.flink.metrics.groups.TaskMetricGroup.<init>(TaskMetricGroup.java:68)
at
org.apache.flink.metrics.groups.JobMetricGroup.addTask(JobMetricGroup.java:74)
at
org.apache.flink.metrics.groups.TaskManagerMetricGroup.addTaskForJob(TaskManagerMetricGroup.java:86)
at
org.apache.flink.runtime.taskmanager.TaskManager.submitTask(TaskManager.scala:1092)
at org.apache.flink.runtime.taskmanager.TaskManager.org
$apache$flink$runtime$taskmanager$TaskManager$$handleTaskMessage(TaskManager.scala:441)
at
org.apache.flink.runtime.taskmanager.TaskManager$$anonfun$handleMessage$1.applyOrElse(TaskManager.scala:283)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at
org.apache.flink.runtime.LeaderSessionMessageFilter$$anonfun$receive$1.applyOrElse(LeaderSessionMessageFilter.scala:36)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:33)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:28)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118)
at
org.apache.flink.runtime.LogMessages$$anon$1.applyOrElse(LogMessages.scala:28)
at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
at
org.apache.flink.runtime.taskmanager.TaskManager.aroundReceive(TaskManager.scala:124)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:254)
at akka.dispatch.Mailbox.run(Mailbox.scala:221)
at akka.dispatch.Mailbox.exec(Mailbox.scala:231)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
14:09:34,732 ERROR org.apache.flink.metrics.reporter.JMXReporter
      - A metric with the name
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numRecordsOut
was already registered.
javax.management.InstanceAlreadyExistsException:
org.apache.flink.metrics:key0=localhost,key1=taskmanager,key2=04a8fea67d01fbbdab5bba8c7e9a73d8,key3=Flink_Java_Job_at_Wed_May_25_14-09-14_CEST_2016,key4=<task_name>,name=numRecordsOut
at com.sun.jmx.mbeanserver.Repository.addMBean(Repository.java:437)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerWithRepository(DefaultMBeanServerInterceptor.java:1898)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerDynamicMBean(DefaultMBeanServerInterceptor.java:966)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerObject(DefaultMBeanServerInterceptor.java:900)
at
com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:324)
at
com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522)
at
org.apache.flink.metrics.reporter.JMXReporter.notifyOfAddedMetric(JMXReporter.java:76)
at org.apache.flink.metrics.MetricRegistry.register(MetricRegistry.java:177)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.addMetric(AbstractMetricGroup.java:191)
at
org.apache.flink.metrics.groups.AbstractMetricGroup.counter(AbstractMetricGroup.java:144)
at
org.apache.flink.metrics.groups.IOMetricGroup.<init>(IOMetricGroup.java:43)
at
org.apache.flink.metrics.groups.TaskMetricGroup.<init>(TaskMetricGroup.java:68)
at
org.apache.flink.metrics.groups.JobMetricGroup.addTask(JobMetricGroup.java:74)
at
org.apache.flink.metrics.groups.TaskManagerMetricGroup.addTaskForJob(TaskManagerMetricGroup.java:86)
at
org.apache.flink.runtime.taskmanager.TaskManager.submitTask(TaskManager.scala:1092)
at org.apache.flink.runtime.taskmanager.TaskManager.org
$apache$flink$runtime$taskmanager$TaskManager$$handleTaskMessage(TaskManager.scala:441)
at
org.apache.flink.runtime.taskmanager.TaskManager$$anonfun$handleMessage$1.applyOrElse(TaskManager.scala:283)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at
org.apache.flink.runtime.LeaderSessionMessageFilter$$anonfun$receive$1.applyOrElse(LeaderSessionMessageFilter.scala:36)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply$mcVL$sp(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:33)
at
scala.runtime.AbstractPartialFunction$mcVL$sp.apply(AbstractPartialFunction.scala:25)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:33)
at org.apache.flink.runtime.LogMessages$$anon$1.apply(LogMessages.scala:28)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:118)
at
org.apache.flink.runtime.LogMessages$$anon$1.applyOrElse(LogMessages.scala:28)
at akka.actor.Actor$class.aroundReceive(Actor.scala:465)
at
org.apache.flink.runtime.taskmanager.TaskManager.aroundReceive(TaskManager.scala:124)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:254)
at akka.dispatch.Mailbox.run(Mailbox.scala:221)
at akka.dispatch.Mailbox.exec(Mailbox.scala:231)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)


Here is the interesting part of the source

val distCOO=env.fromCollection(coo.toStream)
    val distCOO2=env.fromCollection(coo.toStream)
    val m1=DistributedRowMatrix.fromCOO(distCOO,numRows.toInt,numCols.toInt)
    val
m2=DistributedRowMatrix.fromCOO(distCOO2,numRows.toInt,numCols.toInt)

m1.toBlockMatrix(1000,1000).multiply(m2.toBlockMatrix(1000,1000)).toRowMatrix.getRowData.first(5).print()


"coo" is a  Iterator[(Int, Int, Double)].

Here you can find the implementation for the distributed matrix I'm working
on, if you need to follow the stacktrace:
https://github.com/radicalbit/flink/tree/Flink-1873-d/flink-libraries/flink-ml/src/main/scala/org/apache/flink/ml/math/distributed

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message