Now I really need help from community.
I have tried to make worker use thread-local or just local storage instead of global, but even though I have updated everything I think should have been updated, concurrent workers still fail with various memory errors like:
Summary
Thread 101 "data_consumer::" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffe6f5fc640 (LWP 2791843)]
0x00005555562fa7c9 in sctp_find_ifn (ifn=0x0, ifn_index=4294967295) at ../deps/usrsctp/usrsctp/usrsctplib/netinet/sctp_pcb.c:265
265 if (sctp_ifnp->ifn_index == ifn_index) {
(gdb) print sctp_ifnp
$1 = (struct sctp_ifn *) 0x6953726566667542
(gdb) print sctp_ifnp->ifn_index
Cannot access memory at address 0x6953726566667582
(gdb) bt
#0 0x00005555562fa7c9 in sctp_find_ifn (ifn=0x0, ifn_index=4294967295) at ../deps/usrsctp/usrsctp/usrsctplib/netinet/sctp_pcb.c:265
#1 0x00005555562faf55 in sctp_add_addr_to_vrf (vrf_id=0, ifn=0x0, ifn_index=4294967295, ifn_type=0, if_name=0x555556758320 "conn", ifa=0x0, addr=0x7ffe6f5d7610, ifa_flags=0, dynamic_add=0) at ../deps/usrsctp/usrsctp/usrsctplib/netinet/sctp_pcb.c:569
#2 0x000055555616cb00 in usrsctp_register_address (addr=0x1) at ../deps/usrsctp/usrsctp/usrsctplib/user_socket.c:3153
#3 0x0000555555fd39c0 in RTC::SctpAssociation::SctpAssociation (this=0x7ffe5c005340, listener=0x7ffe5c002ac0, os=1024, mis=1024, maxSctpMessageSize=262144, sctpSendBufferSize=262144, isDataChannel=true) at ../src/RTC/SctpAssociation.cpp:128
#4 0x0000555555fd8e03 in RTC::Transport::Transport (this=0x7ffe5c002aa0, id=..., listener=0x7ffe5c00ae00, data=...) at ../src/RTC/Transport.cpp:172
#5 0x0000555555ffd881 in RTC::WebRtcTransport::WebRtcTransport (this=0x7ffe5c002aa0, id=..., listener=0x7ffe5c00ae00, data=...) at ../src/RTC/WebRtcTransport.cpp:32
#6 0x0000555555fafa2a in RTC::Router::HandleRequest (this=0x7ffe5c00ae00, request=0x7ffe5c0081d0) at ../src/RTC/Router.cpp:188
#7 0x0000555555f6e951 in Worker::OnChannelRequest (this=0x7ffe6f5db320, request=0x7ffe5c0081d0) at ../src/Worker.cpp:285
#8 0x0000555555f85e80 in Channel::UnixStreamSocket::OnConsumerSocketMessage (this=0x7ffe5c000bc0,
msg=0x7ffe1d9f807f "{\"id\":1,\"method\":\"router.createWebRtcTransport\",\"data\":{\"enableSctp\":true,\"enableTcp\":false,\"enableUdp\":true,\"initialAvailableOutgoingBitrate\":600000,\"isDataChannel\":true,\"listenIps\":[{\"ip\":\"127.0.0.1"..., msgLen=453) at ../src/Channel/UnixStreamSocket.cpp:118
#9 0x0000555555f8655f in Channel::ConsumerSocket::UserOnUnixStreamRead (this=0x7ffe5c000bd0) at ../src/Channel/UnixStreamSocket.cpp:253
#10 0x00005555561a5086 in UnixStreamSocket::OnUvRead (this=0x7ffe5c000bd0, nread=458) at ../src/handles/UnixStreamSocket.cpp:262
#11 0x00005555561a45bb in onRead (handle=0x7ffe5c001880, nread=458, buf=0x7ffe6f5d7ed0) at ../src/handles/UnixStreamSocket.cpp:30
#12 0x0000555555f51da6 in uv__read (stream=0x7ffe5c001880) at ../deps/libuv/libuv/src/unix/stream.c:1239
#13 0x0000555555f52089 in uv__stream_io (loop=0x7ffe5c000cc0, w=0x7ffe5c001908, events=1) at ../deps/libuv/libuv/src/unix/stream.c:1306
#14 0x0000555555f588d7 in uv__io_poll (loop=0x7ffe5c000cc0, timeout=-1) at ../deps/libuv/libuv/src/unix/linux-core.c:462
#15 0x0000555555f47eb7 in uv_run (loop=0x7ffe5c000cc0, mode=UV_RUN_DEFAULT) at ../deps/libuv/libuv/src/unix/core.c:385
#16 0x0000555555f5d4e2 in DepLibUV::RunLoop () at ../src/DepLibUV.cpp:52
#17 0x0000555555f6cc8f in Worker::Worker (this=0x7ffe6f5db320, channel=0x7ffe5c000bc0, payloadChannel=0x7ffe5c001aa0, handleSignals=false) at ../src/Worker.cpp:39
#18 0x0000555555ef98ae in run (argc=4, argv=0x7ffe5c000c60, version=0x7fff44001570 "0.0.0", consumerChannelFd=242, producerChannelFd=245, payloadConsumeChannelFd=246, payloadProduceChannelFd=249) at ../src/lib.cpp:134
#19 0x0000555555ef3297 in mediasoup_sys::run (args=..., consumer_channel_fd=242, producer_channel_fd=245, payload_consumer_channel_fd=246, payload_producer_channel_fd=249) at /web/github/mediasoup/worker/src/lib.rs:47
#20 0x0000555555ec194b in mediasoup::worker::utils::spawn_with_worker_channels::{{closure}} () at /web/github/mediasoup/rust/src/worker/utils.rs:34
#21 0x0000555555ecc6c2 in std::sys_common::backtrace::__rust_begin_short_backtrace (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125
#22 0x0000555555e78601 in std::thread::Builder::spawn_unchecked::{{closure}}::{{closure}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:474
#23 0x0000555555ecc021 in <std::panic::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once (self=..., _args=()) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:322
#24 0x0000555555edd179 in std::panicking::try::do_call (data=0x7ffe6f5db878 "") at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:379
#25 0x0000555555edd32d in __rust_try ()
#26 0x0000555555edd0a4 in std::panicking::try (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:343
#27 0x0000555555ecc093 in std::panic::catch_unwind (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:396
#28 0x0000555555e7809c in std::thread::Builder::spawn_unchecked::{{closure}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:473
#29 0x0000555555defe7f in core::ops::function::FnOnce::call_once{{vtable-shim}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227
#30 0x00005555566b5b1a in <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b/library/alloc/src/boxed.rs:1328
#31 <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b/library/alloc/src/boxed.rs:1328
#32 std::sys::unix::thread::Thread::new::thread_start () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b//library/std/src/sys/unix/thread.rs:71
#33 0x00007ffff7f5f590 in start_thread (arg=0x7ffe6f5fc640) at pthread_create.c:463
#34 0x00007ffff7d30223 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
or:
Summary
Thread 47 "pipe_transport:" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fff659f6640 (LWP 2864402)]
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:49
49 ../sysdeps/unix/sysv/linux/raise.c: Немає такого файла або каталогу.
(gdb)
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:49
#1 0x00007ffff7c3d864 in __GI_abort () at abort.c:79
#2 0x00007ffff7c3d749 in __assert_fail_base (fmt=0x7ffff7dc9458 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5555566f5876 "n <= stream->write_queue_size", file=0x5555566f56a0 "../deps/libuv/libuv/src/unix/stream.c", line=743, function=<optimized out>) at assert.c:92
#3 0x00007ffff7c4fa96 in __GI___assert_fail (assertion=0x5555566f5876 "n <= stream->write_queue_size", file=0x5555566f56a0 "../deps/libuv/libuv/src/unix/stream.c", line=743, function=0x5555566f5be0 <__PRETTY_FUNCTION__.11> "uv__write_req_update") at assert.c:101
#4 0x0000555555f4d0cb in uv__write_req_update (stream=0x7ffedc001990, req=0x7fff667a6470, n=118) at ../deps/libuv/libuv/src/unix/stream.c:743
#5 0x0000555555f4d67b in uv__write (stream=0x7ffedc001990) at ../deps/libuv/libuv/src/unix/stream.c:900
#6 0x0000555555f4ec6b in uv_write2 (req=0x7fff659a1470, stream=0x7ffedc001990, bufs=0x7fff659a1590, nbufs=1, send_handle=0x0, cb=0x555555f4ed17 <uv_try_write_cb>) at ../deps/libuv/libuv/src/unix/stream.c:1472
#7 0x0000555555f4ed15 in uv_write (req=0x7fff659a1470, handle=0x7ffedc001990, bufs=0x7fff659a1590, nbufs=1, cb=0x555555f4ed17 <uv_try_write_cb>) at ../deps/libuv/libuv/src/unix/stream.c:1497
#8 0x0000555555f4edd5 in uv_try_write (stream=0x7ffedc001990, bufs=0x7fff659a1590, nbufs=1) at ../deps/libuv/libuv/src/unix/stream.c:1522
#9 0x00005555561a13c7 in UnixStreamSocket::Write (this=0x7ffedc000c18, data=0x7ffef45f4010 "40:{\"event\":\"running\",\"targetId\":\"2864355\"}, | throwing MediaSoupError: usrsctp_bind() failed: Address already in use,", len=118) at ../src/handles/UnixStreamSocket.cpp:178
#10 0x0000555555f85073 in Channel::UnixStreamSocket::SendImpl (this=0x7ffedc000bc0, nsPayload=0x555556cc4160 <Logger::buffer>, nsPayloadLen=113) at ../src/Channel/UnixStreamSocket.cpp:105
#11 0x0000555555f823dd in Channel::UnixStreamSocket::SendLog (this=0x7ffedc000bc0, message=0x555556cc4160 <Logger::buffer> "ERTC::SctpAssociation::SctpAssociation() | throwing MediaSoupError: usrsctp_bind() failed: Address already in use", messageLen=113) at ../src/Channel/UnixStreamSocket.cpp:79
#12 0x0000555555fd0a4b in RTC::SctpAssociation::SctpAssociation (this=0x7fff38007dd0, listener=0x7fff3800bfd0, os=1024, mis=1024, maxSctpMessageSize=262144, sctpSendBufferSize=262144, isDataChannel=true) at ../src/RTC/SctpAssociation.cpp:258
#13 0x0000555555fd54e5 in RTC::Transport::Transport (this=0x7fff3800bfb0, id=..., listener=0x7fff3800aab0, data=...) at ../src/RTC/Transport.cpp:172
#14 0x0000555555ff9f63 in RTC::WebRtcTransport::WebRtcTransport (this=0x7fff3800bfb0, id=..., listener=0x7fff3800aab0, data=...) at ../src/RTC/WebRtcTransport.cpp:32
#15 0x0000555555fac10c in RTC::Router::HandleRequest (this=0x7fff3800aab0, request=0x7fff38009db0) at ../src/RTC/Router.cpp:188
#16 0x0000555555f6af81 in Worker::OnChannelRequest (this=0x7fff659a5320, request=0x7fff38009db0) at ../src/Worker.cpp:285
#17 0x0000555555f824da in Channel::UnixStreamSocket::OnConsumerSocketMessage (this=0x7fff38000bc0,
msg=0x7ffeea1f90ea "{\"id\":2,\"method\":\"router.createWebRtcTransport\",\"data\":{\"enableSctp\":true,\"enableTcp\":false,\"enableUdp\":true,\"initialAvailableOutgoingBitrate\":600000,\"isDataChannel\":true,\"listenIps\":[{\"ip\":\"127.0.0.1"..., msgLen=453) at ../src/Channel/UnixStreamSocket.cpp:121
#18 0x0000555555f82bb9 in Channel::ConsumerSocket::UserOnUnixStreamRead (this=0x7fff38000bd0) at ../src/Channel/UnixStreamSocket.cpp:256
#19 0x00005555561a1778 in UnixStreamSocket::OnUvRead (this=0x7fff38000bd0, nread=458) at ../src/handles/UnixStreamSocket.cpp:262
#20 0x00005555561a0cad in onRead (handle=0x7fff38001480, nread=458, buf=0x7fff659a1ed0) at ../src/handles/UnixStreamSocket.cpp:30
#21 0x0000555555f4e3d6 in uv__read (stream=0x7fff38001480) at ../deps/libuv/libuv/src/unix/stream.c:1239
#22 0x0000555555f4e6b9 in uv__stream_io (loop=0x7fff38000cc0, w=0x7fff38001508, events=1) at ../deps/libuv/libuv/src/unix/stream.c:1306
#23 0x0000555555f54f07 in uv__io_poll (loop=0x7fff38000cc0, timeout=-1) at ../deps/libuv/libuv/src/unix/linux-core.c:462
#24 0x0000555555f444e7 in uv_run (loop=0x7fff38000cc0, mode=UV_RUN_DEFAULT) at ../deps/libuv/libuv/src/unix/core.c:385
#25 0x0000555555f59b12 in DepLibUV::RunLoop () at ../src/DepLibUV.cpp:52
#26 0x0000555555f692bf in Worker::Worker (this=0x7fff659a5320, channel=0x7fff38000bc0, payloadChannel=0x7fff380016a0, handleSignals=false) at ../src/Worker.cpp:39
#27 0x0000555555ef5ede in run (argc=4, argv=0x7fff38000c60, version=0x7fffc4001500 "0.0.0", consumerChannelFd=83, producerChannelFd=86, payloadConsumeChannelFd=87, payloadProduceChannelFd=90) at ../src/lib.cpp:134
#28 0x0000555555eef8c7 in mediasoup_sys::run (args=..., consumer_channel_fd=83, producer_channel_fd=86, payload_consumer_channel_fd=87, payload_producer_channel_fd=90) at /web/github/mediasoup/worker/src/lib.rs:47
#29 0x0000555555b298eb in mediasoup::worker::utils::spawn_with_worker_channels::{{closure}} () at /web/github/mediasoup/rust/src/worker/utils.rs:34
#30 0x0000555555edb4e2 in std::sys_common::backtrace::__rust_begin_short_backtrace (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125
#31 0x0000555555ad7d11 in std::thread::Builder::spawn_unchecked::{{closure}}::{{closure}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:474
#32 0x0000555555de9e61 in <std::panic::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once (self=..., _args=()) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:322
#33 0x0000555555eea009 in std::panicking::try::do_call (data=0x7fff659a5878 "\220\024") at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:379
#34 0x0000555555eea1bd in __rust_try ()
#35 0x0000555555ee9e14 in std::panicking::try (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:343
#36 0x0000555555de9ed3 in std::panic::catch_unwind (f=...) at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:396
#37 0x0000555555ad77ac in std::thread::Builder::spawn_unchecked::{{closure}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/thread/mod.rs:473
#38 0x0000555555ae53bf in core::ops::function::FnOnce::call_once{{vtable-shim}} () at /home/nazar-pc/.rustup/toolchains/stable-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227
#39 0x00005555566ad36a in <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b/library/alloc/src/boxed.rs:1328
#40 <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b/library/alloc/src/boxed.rs:1328
#41 std::sys::unix::thread::Thread::new::thread_start () at /rustc/cb75ad5db02783e8b0222fee363c5f63f7e2cf5b//library/std/src/sys/unix/thread.rs:71
#42 0x00007ffff7f5f590 in start_thread (arg=0x7fff659f6640) at pthread_create.c:463
#43 0x00007ffff7d30223 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
At this point I feel it would be more productive to refactor worker from using global state to DI or something like that, but it is not something I would really want to tackle since my skills will be lacking in this case.
Here is the commit to use in case you want to reproduce above crashes (it can get stuck, if it does, just restart, for now we are interested in crashes), run it with cargo test
from the root of the repo, it should build everything automatically, including C++ worker.
I have attempted refactoring global libuv event loop handle in Refactor things to make DepLibUV non-static · nazar-pc/mediasoup@f468308 · GitHub, which worked, but later got stuck with usrsctp that also likes static functions as callbacks
, and there are even more places that need to be changed.