Io.Threaded: checkCancel crashes with "unreachable" when cancel_status is .acknowledged #30601

Open
opened 2025-12-27 15:25:19 +01:00 by jedisct1 · 1 comment
Member

Zig Version

0.16.0-dev.1658+c3f2de5e5

Steps to Reproduce and Observed Behavior

When connecting to a host that returns many IP addresses from DNS (e.g., HuggingFace returns 12 IPs) and after one succeeds, it cancels the others but that currently seems to trigger a race condition and hits unreachable code.

const std = @import("std");

pub fn main() !void {
    var debug_allocator: std.heap.DebugAllocator(.{}) = .{};
    defer _ = debug_allocator.deinit();
    const gpa = debug_allocator.allocator();

    var io_instance = std.Io.Threaded.init(gpa, .{});
    defer io_instance.deinit();
    const io = io_instance.io();

    var http_client: std.http.Client = .{ .allocator = gpa, .io = io };
    defer http_client.deinit();

    // HuggingFace returns 12 IP addresses (8 IPv6 + 4 IPv4)
    // The parallel connection attempts trigger a race condition in checkCancel()
    const uri = try std.Uri.parse("https://huggingface.co");
    var req = try http_client.request(.GET, uri, .{});
    defer req.deinit();
    try req.sendBodiless();
    var response = try req.receiveHead(&.{});
    std.debug.print("Status: {}\n", .{response.head.status});
}
thread 4259980 panic: reached unreachable code
/Users/j/src/zig/lib/std/Io/Threaded.zig:118:30: 0x1044dc233 in checkCancel (repro_threaded_bug)
            .acknowledged => unreachable,
                             ^
/Users/j/src/zig/lib/std/Io/Threaded.zig:4683:51: 0x1045961ab in openSocketPosix (repro_threaded_bug)
                    try current_thread.checkCancel();
                                                  ^
/Users/j/src/zig/lib/std/Io/Threaded.zig:4403:42: 0x104587cef in netConnectIpPosix (repro_threaded_bug)
    const socket_fd = try openSocketPosix(current_thread, family, .{
                                         ^
/Users/j/src/zig/lib/std/Io/net.zig:323:38: 0x1047ce53f in connect (repro_threaded_bug)
        return io.vtable.netConnectIp(io.userdata, &address, options);
                                     ^
/Users/j/src/zig/lib/std/Io/net/HostName.zig:313:35: 0x1047cbfdb in enqueueConnectionFallible (repro_threaded_bug)
    const result = address.connect(io, options);
                                  ^
/Users/j/src/zig/lib/std/Io/net/HostName.zig:303:30: 0x1047b7017 in enqueueConnection (repro_threaded_bug)
    enqueueConnectionFallible(address, io, queue, options) catch |err| switch (err) {
                             ^
/Users/j/src/zig/lib/std/Io.zig:1051:17: 0x1047972bb in start (repro_threaded_bug)
                @call(.auto, function, args_casted.*);
                ^
/Users/j/src/zig/lib/std/Io/Threaded.zig:1101:21: 0x1044cc1c3 in groupAsync (repro_threaded_bug)
        return start(group, context.ptr);
                    ^
/Users/j/src/zig/lib/std/Io.zig:1054:29: 0x10475f3f3 in async__anon_348569 (repro_threaded_bug)
        io.vtable.groupAsync(io.userdata, g, @ptrCast(&args), .of(Args), TypeErased.start);
                            ^
/Users/j/src/zig/lib/std/Io/net/HostName.zig:287:42: 0x10473187b in connectMany (repro_threaded_bug)
        .address => |address| group.async(io, enqueueConnection, .{ address, io, results, options }),
                                         ^
/Users/j/src/zig/lib/std/Io.zig:2043:13: 0x1046da79f in start (repro_threaded_bug)
            result_casted.* = @call(.auto, function, args_casted.*);
            ^
/Users/j/src/zig/lib/std/Io/Threaded.zig:836:16: 0x1045019f7 in start (repro_threaded_bug)
        ac.func(ac.contextPointer(), ac.resultPointer());
               ^
/Users/j/src/zig/lib/std/Io/Threaded.zig:623:26: 0x1045414a7 in worker (repro_threaded_bug)
            closure.start(closure, t);
                         ^
/Users/j/src/zig/lib/std/Thread.zig:558:13: 0x104523e6f in callFn__anon_15676 (repro_threaded_bug)
            @call(.auto, f, args);
            ^
/Users/j/src/zig/lib/std/Thread.zig:829:30: 0x104502587 in entryFn (repro_threaded_bug)
                return callFn(f, args_ptr.*);

Expected Behavior

no unreachable

### Zig Version 0.16.0-dev.1658+c3f2de5e5 ### Steps to Reproduce and Observed Behavior When connecting to a host that returns many IP addresses from DNS (e.g., HuggingFace returns 12 IPs) and after one succeeds, it cancels the others but that currently seems to trigger a race condition and hits unreachable code. ```zig const std = @import("std"); pub fn main() !void { var debug_allocator: std.heap.DebugAllocator(.{}) = .{}; defer _ = debug_allocator.deinit(); const gpa = debug_allocator.allocator(); var io_instance = std.Io.Threaded.init(gpa, .{}); defer io_instance.deinit(); const io = io_instance.io(); var http_client: std.http.Client = .{ .allocator = gpa, .io = io }; defer http_client.deinit(); // HuggingFace returns 12 IP addresses (8 IPv6 + 4 IPv4) // The parallel connection attempts trigger a race condition in checkCancel() const uri = try std.Uri.parse("https://huggingface.co"); var req = try http_client.request(.GET, uri, .{}); defer req.deinit(); try req.sendBodiless(); var response = try req.receiveHead(&.{}); std.debug.print("Status: {}\n", .{response.head.status}); } ``` ```text thread 4259980 panic: reached unreachable code /Users/j/src/zig/lib/std/Io/Threaded.zig:118:30: 0x1044dc233 in checkCancel (repro_threaded_bug) .acknowledged => unreachable, ^ /Users/j/src/zig/lib/std/Io/Threaded.zig:4683:51: 0x1045961ab in openSocketPosix (repro_threaded_bug) try current_thread.checkCancel(); ^ /Users/j/src/zig/lib/std/Io/Threaded.zig:4403:42: 0x104587cef in netConnectIpPosix (repro_threaded_bug) const socket_fd = try openSocketPosix(current_thread, family, .{ ^ /Users/j/src/zig/lib/std/Io/net.zig:323:38: 0x1047ce53f in connect (repro_threaded_bug) return io.vtable.netConnectIp(io.userdata, &address, options); ^ /Users/j/src/zig/lib/std/Io/net/HostName.zig:313:35: 0x1047cbfdb in enqueueConnectionFallible (repro_threaded_bug) const result = address.connect(io, options); ^ /Users/j/src/zig/lib/std/Io/net/HostName.zig:303:30: 0x1047b7017 in enqueueConnection (repro_threaded_bug) enqueueConnectionFallible(address, io, queue, options) catch |err| switch (err) { ^ /Users/j/src/zig/lib/std/Io.zig:1051:17: 0x1047972bb in start (repro_threaded_bug) @call(.auto, function, args_casted.*); ^ /Users/j/src/zig/lib/std/Io/Threaded.zig:1101:21: 0x1044cc1c3 in groupAsync (repro_threaded_bug) return start(group, context.ptr); ^ /Users/j/src/zig/lib/std/Io.zig:1054:29: 0x10475f3f3 in async__anon_348569 (repro_threaded_bug) io.vtable.groupAsync(io.userdata, g, @ptrCast(&args), .of(Args), TypeErased.start); ^ /Users/j/src/zig/lib/std/Io/net/HostName.zig:287:42: 0x10473187b in connectMany (repro_threaded_bug) .address => |address| group.async(io, enqueueConnection, .{ address, io, results, options }), ^ /Users/j/src/zig/lib/std/Io.zig:2043:13: 0x1046da79f in start (repro_threaded_bug) result_casted.* = @call(.auto, function, args_casted.*); ^ /Users/j/src/zig/lib/std/Io/Threaded.zig:836:16: 0x1045019f7 in start (repro_threaded_bug) ac.func(ac.contextPointer(), ac.resultPointer()); ^ /Users/j/src/zig/lib/std/Io/Threaded.zig:623:26: 0x1045414a7 in worker (repro_threaded_bug) closure.start(closure, t); ^ /Users/j/src/zig/lib/std/Thread.zig:558:13: 0x104523e6f in callFn__anon_15676 (repro_threaded_bug) @call(.auto, f, args); ^ /Users/j/src/zig/lib/std/Thread.zig:829:30: 0x104502587 in entryFn (repro_threaded_bug) return callFn(f, args_ptr.*); ``` ### Expected Behavior no unreachable
Owner

@mlugg I think it would be good to evaluate this bug report with respect to your in progress branch that reworks cancel status.

It does not trivially repro on my system as of 2e73288e63.

I doubt this is a race condition; it's more likely a cancelation being incorrectly swallowed internally by std.Io.Threaded implementation somewhere. It's not supposed to acknowledge twice. Or perhaps a constraint that was not properly reevaluated when we introduced recancel and cancel protection.

@mlugg I think it would be good to evaluate this bug report with respect to your in progress branch that reworks cancel status. It does not trivially repro on my system as of 2e73288e6302cc405b8fbb0c4c0667d089775c55. I doubt this is a race condition; it's more likely a cancelation being incorrectly swallowed internally by `std.Io.Threaded` implementation somewhere. It's not supposed to acknowledge twice. Or perhaps a constraint that was not properly reevaluated when we introduced recancel and cancel protection.
Sign in to join this conversation.
No labels
abi/f32
abi/ilp32
abi/n32
abi/sf
abi/x32
accepted
arch/1750a
arch/21k
arch/6502
arch/a29k
arch/aarch64
arch/alpha
arch/amdgcn
arch/arc
arch/arc32
arch/arc64
arch/arm
arch/avr
arch/avr32
arch/bfin
arch/bpf
arch/clipper
arch/colossus
arch/cr16
arch/cris
arch/csky
arch/dlx
arch/dsp16xx
arch/elxsi
arch/epiphany
arch/fr30
arch/frv
arch/h8300
arch/h8500
arch/hexagon
arch/hppa
arch/hppa64
arch/i370
arch/i860
arch/i960
arch/ia64
arch/ip2k
arch/kalimba
arch/kvx
arch/lanai
arch/lm32
arch/loongarch32
arch/loongarch64
arch/m32r
arch/m68k
arch/m88k
arch/maxq
arch/mcore
arch/metag
arch/microblaze
arch/mips
arch/mips64
arch/mmix
arch/mn10200
arch/mn10300
arch/moxie
arch/mrisc32
arch/msp430
arch/nds32
arch/nios2
arch/ns32k
arch/nvptx
arch/or1k
arch/pdp10
arch/pdp11
arch/pj
arch/powerpc
arch/powerpc64
arch/propeller
arch/riscv32
arch/riscv64
arch/rl78
arch/rx
arch/s390
arch/s390x
arch/sh
arch/sh64
arch/sparc
arch/sparc64
arch/spirv
arch/spu
arch/st200
arch/starcore
arch/tilegx
arch/tilepro
arch/tricore
arch/ts
arch/v850
arch/vax
arch/vc4
arch/ve
arch/wasm
arch/we32k
arch/x86
arch/x86_16
arch/x86_64
arch/xcore
arch/xgate
arch/xstormy16
arch/xtensa
autodoc
backend/c
backend/llvm
backend/self-hosted
binutils
breaking
build system
debug info
docs
error message
frontend
fuzzing
incremental
lib/c
lib/compiler-rt
lib/cxx
lib/std
lib/tsan
lib/ubsan-rt
lib/unwind
linking
miscompilation
os/aix
os/android
os/bridgeos
os/contiki
os/dragonfly
os/driverkit
os/emscripten
os/freebsd
os/fuchsia
os/haiku
os/hermit
os/hurd
os/illumos
os/ios
os/kfreebsd
os/linux
os/maccatalyst
os/macos
os/managarm
os/netbsd
os/ohos
os/openbsd
os/plan9
os/redox
os/rtems
os/serenity
os/solaris
os/tvos
os/uefi
os/visionos
os/wali
os/wasi
os/watchos
os/windows
os/zos
proposal
release notes
testing
tier system
zig cc
zig fmt
bounty
bug
contributor-friendly
downstream
enhancement
infra
optimization
question
regression
upstream
No milestone
No project
No assignees
2 participants
Notifications
Due date
The due date is invalid or out of range. Please use the format "yyyy-mm-dd".

No due date set.

Dependencies

No dependencies set.

Reference
ziglang/zig#30601
No description provided.