Stream: compiler development

Topic: setjmp / longjmp & the dev backend


view this post on Zulip Folkert de Vries (Jul 22 2023 at 16:11):

it looks like setjmp/longjmp is very simple actually

#include <stdlib.h>
#include <stdio.h>
#include <setjmp.h>

int main() {
  jmp_buf env;
  int i;

  i = setjmp(env);
  printf("i = %d\n", i);

  if (i != 0) exit(0);

  longjmp(env, 2);
}

then we get the following (relevant) assembly

0000000000201e20 <main>:
  201e20:   55                      push   rbp
  201e21:   48 89 e5                mov    rbp,rsp
  201e24:   53                      push   rbx
  201e25:   48 81 ec c8 00 00 00    sub    rsp,0xc8
  201e2c:   48 8d bd 30 ff ff ff    lea    rdi,[rbp-0xd0]
  201e33:   e8 48 05 00 00          call   202380 <__setjmp>
  201e38:   89 c3                   mov    ebx,eax
  201e3a:   48 8d 3d 8f e3 ff ff    lea    rdi,[rip+0xffffffffffffe38f]        # 2001d0 <xdigits-0xb0>
  201e41:   31 c0                   xor    eax,eax
  201e43:   89 de                   mov    esi,ebx
  201e45:   e8 61 05 00 00          call   2023ab <printf>
  201e4a:   85 db                   test   ebx,ebx
  201e4c:   75 16                   jne    201e64 <main+0x44>
  201e4e:   48 8d bd 30 ff ff ff    lea    rdi,[rbp-0xd0]
  201e55:   be 02 00 00 00          mov    esi,0x2
  201e5a:   e8 f9 04 00 00          call   202358 <_longjmp>
  201e5f:   67 0f b9 40 01          ud1    eax,DWORD PTR [eax+0x1]
  201e64:   31 ff                   xor    edi,edi
  201e66:   e8 be 04 00 00          call   202329 <exit>
  201e6b:   67 0f b9 40 01          ud1    eax,DWORD PTR [eax+0x1]

0000000000202380 <__setjmp>:
  # note: the stack pointer is not updated here. most functions would do something like
  # 2023ab:   48 81 ec d8 00 00 00    sub    rsp,0xd8

  # save relevant registers. Apparently r8 ..= r11 are volatile and don't need to be stored?
  202380:   48 89 1f                mov    QWORD PTR [rdi],rbx # store the env argument in the rdi register
  202383:   48 89 6f 08             mov    QWORD PTR [rdi+0x8],rbp # base pointer
  202387:   4c 89 67 10             mov    QWORD PTR [rdi+0x10],r12
  20238b:   4c 89 6f 18             mov    QWORD PTR [rdi+0x18],r13
  20238f:   4c 89 77 20             mov    QWORD PTR [rdi+0x20],r14
  202393:   4c 89 7f 28             mov    QWORD PTR [rdi+0x28],r15
  # the stack pointer after the jump. I'm not totally sure yet why the +0x8 is there
  # (or really why +8, and not +0x10 or anything else).
  202397:   48 8d 54 24 08          lea    rdx,[rsp+0x8]
  20239c:   48 89 57 30             mov    QWORD PTR [rdi+0x30],rdx
  # the actual jump address 
  2023a0:   48 8b 14 24             mov    rdx,QWORD PTR [rsp]
  2023a4:   48 89 57 38             mov    QWORD PTR [rdi+0x38],rdx
  # set the return value (rax register) to 0
  2023a8:   31 c0                   xor    eax,eax
  2023aa:   c3                      ret

0000000000202358 <_longjmp>:
  # again no stack pointer update

  # I believe what this achieves is to always store something nonzero
  # so that the setjmp call "returns" nonzero when it is jumped to
  202358:   31 c0                   xor    eax,eax
  20235a:   83 fe 01                cmp    esi,0x1
  20235d:   11 f0                   adc    eax,esi
  # edi stores the env. here we read back all of the registers
  20235f:   48 8b 1f                mov    rbx,QWORD PTR [rdi]
  202362:   48 8b 6f 08             mov    rbp,QWORD PTR [rdi+0x8]
  202366:   4c 8b 67 10             mov    r12,QWORD PTR [rdi+0x10]
  20236a:   4c 8b 6f 18             mov    r13,QWORD PTR [rdi+0x18]
  20236e:   4c 8b 77 20             mov    r14,QWORD PTR [rdi+0x20]
  202372:   4c 8b 7f 28             mov    r15,QWORD PTR [rdi+0x28]
  202376:   48 8b 67 30             mov    rsp,QWORD PTR [rdi+0x30]
  # jump to right after the original `setjmp` call.
  20237a:   ff 67 38                jmp    QWORD PTR [rdi+0x38]

so, we just need a way to put that jmp_buf env in global memory, and then a way to retrieve that value when a longjmp is needed (i.e. in roc_panic)

view this post on Zulip Richard Feldman (Jul 22 2023 at 16:23):

nice! :grinning:

view this post on Zulip Richard Feldman (Jul 22 2023 at 16:24):

that would unblock using the dev backend in roc repl and roc test, right?

view this post on Zulip Folkert de Vries (Jul 22 2023 at 16:24):

it's a big step for sure

view this post on Zulip Folkert de Vries (Jul 22 2023 at 16:24):

we still need map2 and some other builtins too

view this post on Zulip Qqwy / Marten (Jul 22 2023 at 17:28):

What is the plan for using setjmp/longjmp? Recover from a crash at the boundary with a host? :happy:

view this post on Zulip Folkert de Vries (Jul 22 2023 at 18:01):

basically

view this post on Zulip Folkert de Vries (Jul 22 2023 at 18:02):

for the repl and tests, for the dev backend we need something to return control to the host when a roc_panic happens

view this post on Zulip Qqwy / Marten (Jul 22 2023 at 21:54):

By the way

  i = setjmp(env);

Though widely supported, this is undefined behaviour.
Officially you're only allowed to immediately branch based on the return value (or fully ignore it), but not to store it anywhere. (source) So if there is any data we want to keep between the longjump and the setjmp we'd need to set aside one more global variable for it.

Not sure whether we care, though :angel: . I am not aware of any C targets where this does not work in practice, even though strictly speaking it is UB.

view this post on Zulip Folkert de Vries (Jul 23 2023 at 12:25):

ah, yes. I knew this, but just copied an example from the internet (it was from a university course, too). But luckily, there is no UB in assembly

view this post on Zulip Qqwy / Marten (Jul 23 2023 at 12:30):

But luckily, there is no UB in assembly

:100: !!

view this post on Zulip Luke Boswell (Jul 26 2023 at 06:41):

I've been working through the errors to try and get zig builtins tests working on Windows and I've hit a bit of a snag I think with setjmp/longjmp. It's not entirely related to this thread, but posting here as it may be related.

Basically I'm currently getting the following error and not sure how to resolve this.

lld-link: error: undefined symbol: setjmp
>>> referenced by C:\Users\bosyl\Documents\GitHub\roc\crates\compiler\builtins\bitcode\src\main.zig:242
>>>               src\zig-cache\o\e65f97141b821227497b14c04e9fd551\test.obj:(__roc_force_setjmp)

lld-link: error: undefined symbol: longjmp
>>> referenced by C:\Users\bosyl\Documents\GitHub\roc\crates\compiler\builtins\bitcode\src\main.zig:245
>>>               src\zig-cache\o\e65f97141b821227497b14c04e9fd551\test.obj:(__roc_force_longjmp)
error: LLDReportedFailure

The related code is in crates\compiler\builtins\bitcode\src\main.zig I think with

// Utils continued - SJLJ
// For tests (in particular test_gen), roc_panic is implemented in terms of
// setjmp/longjmp. LLVM is unable to generate code for longjmp on AArch64 (https://github.com/roc-lang/roc/issues/2965),
// so instead we ask Zig to please provide implementations for us, which is does
// (seemingly via musl).
pub extern fn setjmp([*c]c_int) c_int;
pub extern fn longjmp([*c]c_int, c_int) noreturn;
pub extern fn _setjmp([*c]c_int) c_int;
pub extern fn _longjmp([*c]c_int, c_int) noreturn;
pub extern fn sigsetjmp([*c]c_int, c_int) c_int;
pub extern fn siglongjmp([*c]c_int, c_int) noreturn;
pub extern fn longjmperror() void;

// Zig won't expose the externs (and hence link correctly) unless we force them to be used.
fn __roc_force_setjmp(it: [*c]c_int) callconv(.C) c_int {
    return setjmp(it);
}
fn __roc_force_longjmp(a0: [*c]c_int, a1: c_int) callconv(.C) noreturn {
    longjmp(a0, a1);
}

My question, how do I ensure zig links in something with these on Windows?

view this post on Zulip Luke Boswell (Jul 26 2023 at 07:12):

Figured a workaround that seems to work, all the tests pass now :tada:

fn roc_setjmp_windows_stub(it: [*c]c_int) callconv(.C) c_int {
    _ = it;
    return 0;
}

fn roc_longjmp_windows_stub(a0: [*c]c_int, a1: c_int) callconv(.C) noreturn {
    _ = a0;
    _ = a1;
    std.os.exit(1);
}

comptime {
    if (builtin.os.tag == .windows) {
        @export(roc_longjmp_windows_stub, .{ .name = "longjmp", .linkage = .Strong });
        @export(roc_setjmp_windows_stub, .{ .name = "setjmp", .linkage = .Strong });
    }
}

view this post on Zulip Folkert de Vries (Jul 26 2023 at 21:17):

I'm getting close with this https://github.com/roc-lang/roc/pull/5699

view this post on Zulip Folkert de Vries (Jul 26 2023 at 21:17):

on x86_64 linux, anyway

view this post on Zulip Luke Boswell (Jul 27 2023 at 00:58):

@Folkert de Vries is your work related to Windows at all? I'm just wondering if there is anything I can/should do to assist here.

view this post on Zulip Folkert de Vries (Jul 27 2023 at 07:15):

eventually, maybe. but I suspect there are other dev backend tests that fail too right now

view this post on Zulip Folkert de Vries (Jul 27 2023 at 07:16):

maybe try cargo nextest-gen-dev --no-fail-fast and see what happens on main today


Last updated: Jul 06 2025 at 12:14 UTC