mesos-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "haosdent (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (MESOS-2539) ExamplesTest.LowLevelSchedulerLibprocess is flaky
Date Wed, 29 Apr 2015 13:07:07 GMT

    [ https://issues.apache.org/jira/browse/MESOS-2539?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14519321#comment-14519321
] 

haosdent commented on MESOS-2539:
---------------------------------

I use gdb trace the error in CentOS. Seems this code have problems:
{code}
signaledWrapper = defer(self(), &Slave::signaled, lambda::_1, lambda::_2);
{code}

The stack is 
{code}
(gdb) where
#0  0x0000003056a32635 in raise () from /lib64/libc.so.6
#1  0x0000003056a33e15 in abort () from /lib64/libc.so.6
#2  0x0000003056a70547 in __libc_message () from /lib64/libc.so.6
#3  0x0000003056a75e76 in malloc_printerr () from /lib64/libc.so.6
#4  0x00007ffff6254d5f in _Base_manager<process::_Defer<void (*(process::PID<mesos::internal::slave::Slave>,
void (mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>, std::tr1::_Placeholder<2>))(const
process::PID<mesos::internal::slave::Slave>&, void (mesos::internal::slave::Slave::*)(int,
int), int, int)> >::_M_destroy(std::tr1::_Any_data &, std::tr1::false_type) (__victim=...)
   at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1523#5
 0x00007ffff62471ee in _Base_manager<process::_Defer<void (*(process::PID<mesos::internal::slave::Slave>,
void (mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>, std::tr1::_Placeholder<2>))(const
process::PID<mesos::internal::slave::Slave>&, void (mesos::internal::slave::Slave::*)(int,
int), int, int)> >::_M_manager(std::tr1::_Any_data &, const std::tr1::_Any_data
&, std::tr1::_Manager_operation) (__dest=..., __source=..., __op=std::tr1::__destroy_functor)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1547
#6  0x0000000000436051 in std::tr1::_Function_base::~_Function_base (this=0x7ffff07d0850,
__in_chrg=<value optimized out>)
    at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1628
#7  0x00007ffff6224348 in std::tr1::function<void(int, int)>::~function(void) (this=0x7ffff07d0850,
__in_chrg=<value optimized out>)
    at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1463
#8  0x00007ffff62243cf in std::tr1::function<void(int, int)>::operator=<process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void (mesos::internal::slave::Slave::*)(int,
int), std::tr1::_Placeholder<1>, std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)> >(process::_Defer<void
(*(process::PID<mesos::internal::slave::Slave>, void (mesos::internal::slave::Slave::*)(int,
int), std::tr1::_Placeholder<1>, std::tr1::_Placeholder<2>))(const process::PID<mesos::internal::slave::Slave>&,
void (mesos::internal::slave::Slave::*)(int, int), int, int)>) (this=0x7ffff77e8440, __f=...)
at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:1885
#9  0x00007ffff61f13ec in mesos::internal::slave::Slave::initialize (this=0x717d40) at ../../src/slave/slave.cpp:491
#10 0x00007ffff68bfa4f in resume () from /home/ld-sgdev/huangh/mesos/build/src/.libs/libmesos-0.23.0.so
#11 0x00007ffff68b44bd in schedule () from /home/ld-sgdev/huangh/mesos/build/src/.libs/libmesos-0.23.0.so
#12 0x00000030572079d1 in start_thread () from /lib64/libpthread.so.0
#13 0x0000003056ae886d in clone () from /lib64/libc.so.6
{code}

{code}
(gdb) frame 9
#9  0x00007ffff61f13ec in mesos::internal::slave::Slave::initialize (this=0x717d40) at ../../src/slave/slave.cpp:491
491       signaledWrapper = defer(self(), &Slave::signaled, lambda::_1, lambda::_2);
(gdb) list
486
487       // The SA_SIGINFO flag tells sigaction() to use
488       // the sa_sigaction field, not sa_handler.
489       action.sa_flags = SA_SIGINFO;
490
491       signaledWrapper = defer(self(), &Slave::signaled, lambda::_1, lambda::_2);
492
493       action.sa_sigaction = signalHandler;
494
495       if (sigaction(SIGUSR1, &action, NULL) < 0) {
{code}

{code}
(gdb) frame 4
#4  0x00007ffff6254d5f in _Base_manager<process::_Defer<void (*(process::PID<mesos::internal::slave::Slave>,
void (mesos::internal::slave::Slave::*)(int, int), std::tr1::_Placeholder<1>, std::tr1::_Placeholder<2>))(const
process::PID<mesos::internal::slave::Slave>&, void (mesos::internal::slave::Slave::*)(int,
int), int, int)> >::_M_destroy(std::tr1::_Any_data &, std::tr1::false_type) (__victim=...)
   at /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../include/c++/4.4.7/tr1_impl/functional:15231523
             delete __victim._M_access<_Functor*>();
(gdb) lis15181519            // Destroying an object located on the heap.
1520            static void
1521            _M_destroy(_Any_data& __victim, false_type)
1522            {
1523              delete __victim._M_access<_Functor*>();
1524            }
1525
1526          public:
1527            static bool
{code}

But I still could not understand why {code} signaledWrapper = defer(self(), &Slave::signaled,
lambda::_1, lambda::_2); {code} would cause {code}delete __victim._M_access<_Functor*>();{code}

> ExamplesTest.LowLevelSchedulerLibprocess is flaky
> -------------------------------------------------
>
>                 Key: MESOS-2539
>                 URL: https://issues.apache.org/jira/browse/MESOS-2539
>             Project: Mesos
>          Issue Type: Bug
>    Affects Versions: 0.22.0, 0.23.0
>            Reporter: Jie Yu
>
> Centos6 gcc-44
> sudo make check
> {noformat}
> [ RUN      ] ExamplesTest.LowLevelSchedulerLibprocess
> 2015-03-24 19:54:54,995:5735(0x7fc007fff700):ZOO_ERROR@handle_socket_error_msg@1697:
Socket [127.0.0.1:37590] zk retcode=-4, errno
> =111(Connection refused): server refused to accept the client
> *** glibc detected *** /home/jyu/workspace/mesos-dist/build/src/.libs: double free or
corruption (fasttop): 0x00007f7f6c003150 ***
> ======= Backtrace: =========
> /lib64/libc.so.6(+0x75e66)[0x7f7f8b79ee66]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr114_Function_base13_Base_managerIN7process6_DeferIFPFvRK
> NS2_3PIDIN5mesos8internal5slave5SlaveEEEMS8_FviiEiiES9_SD_NS_12_PlaceholderILi1EEENSG_ILi2EEEEEEE10_M_destroyERNS_9_Any_dataENS_17
> integral_constantIbLb0EEE+0x31)[0x7f7f8ecef16b]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr114_Function_base13_Base_managerIN7process6_DeferIFPFvRK
> NS2_3PIDIN5mesos8internal5slave5SlaveEEEMS8_FviiEiiES9_SD_NS_12_PlaceholderILi1EEENSG_ILi2EEEEEEE10_M_managerERNS_9_Any_dataERKSM_
> NS_18_Manager_operationE+0x92)[0x7f7f8ece17c0]
> /home/jyu/workspace/mesos-dist/build/src/.libs(_ZNSt3tr114_Function_baseD1Ev+0x37)[0x45107d]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr18functionIFviiEED1Ev+0x18)[0x7f7f8ecbeb34]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZNSt3tr18functionIFviiEEaSIN7process6_DeferIFPFvRKNS4_3PIDIN5me
> sos8internal5slave5SlaveEEEMSA_FviiEiiESB_SF_NS_12_PlaceholderILi1EEENSI_ILi2EEEEEEEEN9__gnu_cxx11__enable_ifIXntsrNS_11is_integra
> lIT_EE5valueERS2_E6__typeESQ_+0x85)[0x7f7f8ecbebbb]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN5mesos8internal5slave5Slave10initializeEv+0x31bb)[0x7f7f8ec8b
> f99]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN7process14ProcessManager6resumeEPNS_11ProcessBaseE+0x299)[0x7
> f7f8f3bf007]
> /home/jyu/workspace/mesos-dist/build/src/.libs/libmesos-0.23.0.so(_ZN7process8scheduleEPv+0x91)[0x7f7f8f3b3a75]
> /lib64/libpthread.so.0(+0x79d1)[0x7f7f8c2649d1]
> /lib64/libc.so.6(clone+0x6d)[0x7f7f8b8118fd]
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message