zig layout for a roc struct that contains a function · platform development

I try to have a mainForHost in a platform, that returns a struct which has a field that is a function:

Job : {
    name: List U8,
    value: List U8,
    callback: (List U8 -> List U8),
}

But I am not able to build the corresponding data structure in zig. I was able, that roc returns a function or that roc returns a struct without a function, but I have no idea how to handle the combination.

What I want to do in zig is getting the pointer to the function, so I can call roc__mainForHost_0_caller.

Brendan Hansknecht (Jul 01 2023 at 14:45):

That function just returns closure capture data for the callback. The callback would need to be exposed and called directly like mainForHost is called.

Brendan Hansknecht (Jul 01 2023 at 14:47):

Should be doable. Probably need an as statement in the type to get roc to expose the caller function (though not 100% sure if that still is the case)

Brendan Hansknecht (Jul 01 2023 at 14:48):

@Folkert de Vries does that sound correct with the latest glue and function related changes?

Folkert de Vries (Jul 01 2023 at 15:09):

if that is correct then yes you'd need to name the function type (so something likecallback : (a -> b) as Foo in the signature of mainForHost

Brendan Hansknecht (Jul 01 2023 at 16:01):

Yeah, glue isn't used, but I wasn't sure if the function generation strategy changed some because of glue changes.

Brendan Hansknecht (Jul 01 2023 at 16:01):

Does the job type still need to be embedded in they type of mainForHost or can the as go in the job type definition?

Folkert de Vries (Jul 01 2023 at 16:02):

Oskar Hahn (Jul 01 2023 at 19:35):

Glue is not used. As far as I know, glue does currently not support zig. If I am wrong, I would like to use glue.

I tried it with the as statement, but it did not help. I am not sure if as is needed. If I just return a function (mainForHost : List u8 -> (List U8 -> List U8)) then it works. roc creates a function roc__mainForHost_0_caller that I can call with the return value from roc__mainForHost_1_exposed and everything works fine.

Job : {
    name: List U8,
    value: List U8,
    callback: List U8 -> List U8,
}

const Job = extern struct { placeholder_I_dont_understand: u128, name: RocList, value: RocList };
const RocList = extern struct { pointer: [*]u8, length: usize, capacity: usize };

extern fn roc__mainForHost_1_exposed(job: *Job, argument: *RocList) void;
extern fn roc__mainForHost_0_caller(argument: *RocList, callback_pointer: [*]u8, result: *RocList) void;

With the following code, I get a pointer, that I can use to call roc__mainForHost_0_caller

var result: *Job = undefined;
roc__mainForHost_1_exposed(result, arg);
return @ptrCast([*]u8, result);

var result: Job = undefined;
roc__mainForHost_1_exposed(&result, arg);
return result.name.pointer;

But I do not know how to call roc__mainForHost_1_exposed so I can get the callresult pointer and the name attribute.

I would have guest, that it should be easy to go from *Job to Job with something like &result.name.pointer or result.*.name.pointer, but nothing seems to work.

Folkert de Vries (Jul 01 2023 at 19:38):

Folkert de Vries (Jul 01 2023 at 19:39):

and you can use the --debug flag in your roc build/run command to have roc generate .ll files. Those contain exactly the names/types of functions that roc generates, so that you can use them from zig

Oskar Hahn (Jul 01 2023 at 19:59):

But the solution was something different. As I looked at the values, zig behaved very strange. For example the pointer cast to an int was something like 7. And the value from 1000 + @ptrToInt(result.name.pointer) was still 7

It seems, that there are some critical bugs in zig 0.9.1 with wasm that where fixed with zig 0.10.1.

Folkert de Vries (Jul 01 2023 at 20:06):

Oskar Hahn (Jul 01 2023 at 20:16):

Oskar Hahn (Jul 04 2023 at 12:39):

Could I ask for you help once more? I am still struggling with zig. I never used a language with manual memory management before. So I have some difficulties with it.

I am trying to write a function in zig, that calls roc and then returns a pointer to a zig-struct with two values. If I am correct, I have to allocate this struct on the heap. But as soon as I allocate anything, the value from roc gets corrupted.

var roc_result: u32 = undefined;
roc__mainForHost_1_exposed(roc_result, arg);
return roc_result;

const allocator = std.heap.page_allocator;

var roc_result: u32 = undefined;
roc__mainForHost_1_exposed(roc_result, arg);
_ = allocator.create(u32) catch
    @panic("failed to allocate result type");
return roc_result;

const Align = extern struct { a: usize, b: usize };
extern fn malloc(size: usize) callconv(.C) ?*align(@alignOf(Align)) anyopaque;
extern fn realloc(c_ptr: [*]align(@alignOf(Align)) u8, size: usize) callconv(.C) ?*anyopaque;
extern fn free(c_ptr: [*]align(@alignOf(Align)) u8) callconv(.C) void;
extern fn memcpy(dest: *anyopaque, src: *anyopaque, count: usize) *anyopaque;

export fn roc_alloc(size: usize, alignment: u32) callconv(.C) ?*anyopaque {
    _ = alignment;

    return malloc(size);
}

export fn roc_realloc(c_ptr: *anyopaque, new_size: usize, old_size: usize, alignment: u32) callconv(.C) ?*anyopaque {
    _ = old_size;
    _ = alignment;

    return realloc(@alignCast(@alignOf(Align), @ptrCast([*]u8, c_ptr)), new_size);
}

export fn roc_dealloc(c_ptr: *anyopaque, alignment: u32) callconv(.C) void {
    _ = alignment;

    free(@alignCast(@alignOf(Align), @ptrCast([*]u8, c_ptr)));
}

Could it be, that the zig page_allocator and roc_alloc are writing to the same places? How could I rewrite the roc_alloc to use the page_allocator?

Folkert de Vries (Jul 04 2023 at 12:44):

var roc_result: u32 = undefined;
roc__mainForHost_1_exposed(roc_result, arg);

Folkert de Vries (Jul 04 2023 at 12:44):

you seem to give it a u32 which is almost certainly not what the LLVM code says it should be

Folkert de Vries (Jul 04 2023 at 12:45):

I expect that it wants a pointer, maybe i32* in the llvm IR? or something ending in * certainly

Folkert de Vries (Jul 04 2023 at 12:47):

so in general. the fact that the earlier program seems to work is coincidence. the zig side has to match the roc (llvm) side exactly

Oskar Hahn (Jul 04 2023 at 12:47):

define void @roc__mainForHost_1_exposed({ { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }* sret({ { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }) %0, { i8*, i32, i32 }* %1) {
entry:
  %result_value = alloca { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }, align 8
  %bitcast_arg = bitcast { i8*, i32, i32 }* %1 to %list.RocList*
  %load_arg = load %list.RocList, %list.RocList* %bitcast_arg, align 4
  call fastcc void @_mainForHost_c610e85212d0697cb161d4ba431ba63f273feee7dcb7927c9ff5d74ae6cbfa3(%list.RocList %load_arg, { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }* %result_value)
  %load_roc_result = load { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }, { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }* %result_value, align 4
  store { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList } %load_roc_result, { { [0 x i32], [12 x i8], i8, [3 x i8] }, %list.RocList }* %0, align 4
  ret void
}

Folkert de Vries (Jul 04 2023 at 12:48):

assuming you're on a 64-bit machine, using u64 (or usize, to be generic) is usually the better choice

Folkert de Vries (Jul 04 2023 at 12:48):

Oskar Hahn (Jul 04 2023 at 12:49):

Folkert de Vries (Jul 04 2023 at 12:49):

Oskar Hahn (Jul 04 2023 at 12:50):

const RocJob = extern struct { placeholder_I_dont_understand: u128, value: RocList };
const RocList = extern struct { pointer: [*]u8, length: usize, capacity: usize };
// TODO: u32 works, but use a pointer, so it is more clear
extern fn roc__mainForHost_1_exposed(job: u32, argument: *RocList) void;

Folkert de Vries (Jul 04 2023 at 12:50):

Folkert de Vries (Jul 04 2023 at 12:51):

because you know exactly what types and sizes you expect, no (heap) allocation is needed

Folkert de Vries (Jul 04 2023 at 12:53):

also because this already uses the sret attribute ("stack return"), I wonder if this might just work

extern fn roc__mainForHost_1_exposed(argument: *RocList) RocJob;

Oskar Hahn (Jul 04 2023 at 12:58):

Yes. This works and looks nicer. But it still does not work if I add an allocation:

var roc_result = roc__mainForHost_1_exposed(arg);

_ = allocator.create(u32) catch
        @panic("failed to allocate result type");

return @ptrToInt(&roc_result);

Oskar Hahn (Jul 04 2023 at 12:59):

The goal is, to return something like this (both are pointers, I just use u32 for the moment to make it better understand it from the JavaScript-die):

const ExternJob = extern struct { callback: u32, value: u32 };

Folkert de Vries (Jul 04 2023 at 13:02):

describe "does not work" in more detail? no allocation is happening. malloc and the rust page allocator don't interfere

Folkert de Vries (Jul 04 2023 at 13:03):

what is happening here is that you are returning a pointer that does not live long enough

Folkert de Vries (Jul 04 2023 at 13:03):

Folkert de Vries (Jul 04 2023 at 13:05):

the &roc_result takes a pointer to the roc_result value as it lives on the stack of the surrounding function. But when you return, that stack memory is free'd up. So whoever you return this pointer to gets a pointer that points to invalid memory (it has just been cleaned up)

Folkert de Vries (Jul 04 2023 at 13:05):

I don't think your surrounding function should return a pointer. would make things way simpler

Oskar Hahn (Jul 04 2023 at 13:06):

That is true. But wasm only supports basic types like u32. So a pointer is all I can return

Folkert de Vries (Jul 04 2023 at 13:07):

kind of. if it's an extern function. One possible approach (given that you know the types) is to provide the function with a pointer to write the result into

Folkert de Vries (Jul 04 2023 at 13:08):

Oskar Hahn (Jul 04 2023 at 13:10):

Its hard to tell what is going wrong. What should happen is, that I return a pointer to the closure (I don't know if this is the correct word) that I have to use to later call roc__mainForHost_0_caller. So in result, I call roc with two arguments. One to roc__mainForHost_1_exposed and on to roc__mainForHost_0_caller. The two arguments should be combined with this roc function:

main = \arg1 -> \arg2 -> "arg1: \(arg1), arg2: \(arg2)"

When I said, it works, then this happend. When I said it does not work, then it was a strange behavior. For exmaple it returns a string where arg1 was the variable I put into arg2

Oskar Hahn (Jul 04 2023 at 13:11):

But it has one problem. I do not know hat the size of the closure is :) I just returned the pointer. But if I save it somewhere, I have to know the size.

Folkert de Vries (Jul 04 2023 at 13:13):

oh. hmm. well really I think you should not do this manually if you expect the roc program to change

Folkert de Vries (Jul 04 2023 at 13:13):

Oskar Hahn (Jul 04 2023 at 13:14):

Folkert de Vries (Jul 04 2023 at 13:14):

Folkert de Vries (Jul 04 2023 at 13:15):

basically, there is no reasonable way for the zig code to know what roc will give it, and what structure it will have

Folkert de Vries (Jul 04 2023 at 13:16):

so unless you know the structure exactly and it does not change as you change the roc program, you'll run into trouble

Folkert de Vries (Jul 04 2023 at 13:16):

and here structure means, loosely, the type of main. Loosely because when you start returning closures things get complicated

Oskar Hahn (Jul 04 2023 at 13:20):

All I would need is the size. What does roc__mainForHost_1_size() or roc__mainForHost_0_size do? They sound as if they could return the size?

Folkert de Vries (Jul 04 2023 at 13:21):

yes they do. but this only works for one level. (if that is all you need, great!)

Folkert de Vries (Jul 04 2023 at 13:23):

the crates/cli_testing_examples/benchmarks/platform/host.zig file might be helpful

Folkert de Vries (Jul 04 2023 at 13:24):

pub fn main() !u8 {
    const stderr = std.io.getStdErr().writer();

    // The size might be zero; if so, make it at least 8 so that we don't have a nullptr
    const size = std.math.max(@intCast(usize, roc__mainForHost_1_exposed_size()), 8);
    const raw_output = roc_alloc(@intCast(usize, size), @alignOf(u64)).?;
    var output = @ptrCast([*]u8, raw_output);

    defer {
        roc_dealloc(raw_output, @alignOf(u64));
    }

    var timer = std.time.Timer.start() catch unreachable;

    roc__mainForHost_1_exposed_generic(output);

    const closure_data_pointer = @ptrCast([*]u8, output);
    // ...
}

Folkert de Vries (Jul 04 2023 at 13:24):

in your case remove the defer block, because it would clean up the memory, but you want to return it

Oskar Hahn (Jul 04 2023 at 13:25):

My internet connection will break in some minutes. So I say thank you for know and will tell you when I tested it

Oskar Hahn (Jul 04 2023 at 14:34):

mainForHost : Task {} [] as Fx

mainForHost : List U8 -> Task {} [] as Fx

In this case, the closure would (or at least could) contain the a variable sized argument. So the size of the closure could only be known at runtime. But as far as I can see, al the *_size() functions return a value, that is independent of the size of the input argument. I tested it with all of these functions:

extern fn roc__mainForHost_1_exposed_size() i64;
extern fn roc__mainForHost_0_size() i64;
extern fn roc__mainForHost_0_result_size() i64;
extern fn roc__mainForHost_1_size() i64;
extern fn roc__mainForHost_1_result_size() i64;

const RocList = extern struct { pointer: [*]u8, length: usize, capacity: usize };

So it not only returns a pointer to the data, but also the length of the data. If it would only return the pointer, then it would not be possible to do anything with it.

So I think, it should be the same for a closure. A closure should also return its size.

Is this already the case? If yes: how can I access it? if not, do you also see, that is would be helpful?

Folkert de Vries (Jul 04 2023 at 14:41):

Folkert de Vries (Jul 04 2023 at 14:43):

in this case, we statically know (when we compile the app) exactly how big that closure is

Folkert de Vries (Jul 04 2023 at 14:43):

Oskar Hahn (Jul 04 2023 at 15:02):

How is that possible? I would have thought, that the closure has to contain all the data that the main function was called with. If you call main with 1GB of data, I would think, that the closure is 1GB +X.

Folkert de Vries (Jul 04 2023 at 15:07):

Folkert de Vries (Jul 04 2023 at 15:08):

because if so, all we store in the closure is 3 usize values: ptr, len, capacity. the actual contents of the list are elsewhere in memory and kept alive (as in, not cleaned up) because the closure contains them, but it does not count towards the data that is returned from main

Oskar Hahn (Jul 04 2023 at 20:19):

Ahh this makes sense. But I still don't get it 100%. The argument list it can be saved elsewhere. But what about data, that is allocated from roc? For example the following function

main : List Str -> ( U32 -> List Str )
main = \list ->
    new_list = List.map list (\s -> "hello (\s)")
    \i ->
        List.map new_list (\s -> Str.repeat s i)

This would return a closure that has to contain new_list. But since it has a variable size, it has to be stored elsewhere. So when will the actual content of new_list be freed? roc can not know, if the platform saves the closure (with the pointer to new_list) to call it later.

Brendan Hansknecht (Jul 04 2023 at 21:08):

Otherwise, if new_list were passed back into roc without the refcount incremented, roc would free it.

Brendan Hansknecht (Jul 04 2023 at 21:09):

If new_list is passed to the platform with a unique refcount, the platform is responsible for freeing it assuming that the list isn't passed back into roc.

Brendan Hansknecht (Jul 04 2023 at 21:11):

Also, theoretically new_list may not even be a new allocation. It may be the same allocation as list. Depends on if list was unique to begin with and if we can update in place.

Brendan Hansknecht (Jul 04 2023 at 21:13):

By default, new_list, which is saved in the closure, would have a unique refcount. As such, roc would free it after the closure is run. (Though it still may in place update and reuse the allocation instead of freeing).

Brendan Hansknecht (Jul 04 2023 at 21:14):

If the closure was going to be called multiple times, the list captured by the closure would need its refcount incremented before each call to avoid being freed or reused.

Oskar Hahn (Jul 04 2023 at 22:30):

That is interesting. With this information, I am not sure, if my use case is possible.

I want to call roc, receive a closure, copy the closure to the heap and return the pointer to the wasm-runtime. Later, another function calls run_closure with the pointer.

If I understand you both correctly, as soon as the first function returns, the original closure will be freed. Since the allocated data has a refcound of 1, it will also be freed. To solve this, I would have to add the refcound of all data referenced by the closure by 1. I don't know how to do this.

Would it be possible to tell roc somehow, that I made a copy of the closure and that it should not reduce the refcounter, when the original closure on the stack gets "freed"?

Or do you have another idea, how a task-like feature could work with webassembly?

Folkert de Vries (Jul 04 2023 at 22:54):

once the data is out of roc, freeing the memory is your responsibility. you can just not do it

Folkert de Vries (Jul 04 2023 at 22:54):

Folkert de Vries (Jul 04 2023 at 22:55):

now, for the storing of the closure, I think by far the easiest is to have roc return a Box YourActualReturnType

Folkert de Vries (Jul 04 2023 at 22:56):

Folkert de Vries (Jul 04 2023 at 22:57):

Folkert de Vries (Jul 04 2023 at 22:58):

then the mainforhost_exposed_generic function takes a pointer as an argument and writes the return value into it. usually we'd give a stack pointer, but you can provide a heap pointer (given to you by the allocater) as well

Folkert de Vries (Jul 04 2023 at 22:59):

we're working on better support for Task using glue. it is quite tricky to do it in general though.

Oskar Hahn (Jul 09 2023 at 16:20):

I still don't get, when the memory is deallocated. But after I copied the closure_data to a manually allocated memory, everything works. So thank you for you help.

Afterwards I realized, that I misunderstood, how Task work. Now I get, that I have to define the effects in a special hosted Effect module. This makes it much easier. I have other questions about this, but I will ask them i a different thread :)

Stream: platform development

Topic: zig layout for a roc struct that contains a function

Oskar Hahn (Jun 30 2023 at 22:52):

Brendan Hansknecht (Jul 01 2023 at 14:45):

Brendan Hansknecht (Jul 01 2023 at 14:47):

Brendan Hansknecht (Jul 01 2023 at 14:48):

Folkert de Vries (Jul 01 2023 at 15:09):

Folkert de Vries (Jul 01 2023 at 15:09):

Brendan Hansknecht (Jul 01 2023 at 16:01):

Brendan Hansknecht (Jul 01 2023 at 16:01):

Folkert de Vries (Jul 01 2023 at 16:02):

Oskar Hahn (Jul 01 2023 at 19:35):

Folkert de Vries (Jul 01 2023 at 19:38):

Folkert de Vries (Jul 01 2023 at 19:39):

Oskar Hahn (Jul 01 2023 at 19:59):

Folkert de Vries (Jul 01 2023 at 20:06):

Oskar Hahn (Jul 01 2023 at 20:16):

Oskar Hahn (Jul 04 2023 at 12:39):

Folkert de Vries (Jul 04 2023 at 12:44):

Folkert de Vries (Jul 04 2023 at 12:44):

Folkert de Vries (Jul 04 2023 at 12:45):

Folkert de Vries (Jul 04 2023 at 12:47):

Oskar Hahn (Jul 04 2023 at 12:47):

Folkert de Vries (Jul 04 2023 at 12:48):

Folkert de Vries (Jul 04 2023 at 12:48):

Oskar Hahn (Jul 04 2023 at 12:49):

Folkert de Vries (Jul 04 2023 at 12:49):

Oskar Hahn (Jul 04 2023 at 12:50):

Folkert de Vries (Jul 04 2023 at 12:50):

Folkert de Vries (Jul 04 2023 at 12:51):

Folkert de Vries (Jul 04 2023 at 12:51):

Folkert de Vries (Jul 04 2023 at 12:53):

Oskar Hahn (Jul 04 2023 at 12:58):

Oskar Hahn (Jul 04 2023 at 12:59):

Folkert de Vries (Jul 04 2023 at 13:02):

Folkert de Vries (Jul 04 2023 at 13:03):

Folkert de Vries (Jul 04 2023 at 13:03):

Folkert de Vries (Jul 04 2023 at 13:03):

Folkert de Vries (Jul 04 2023 at 13:05):

Folkert de Vries (Jul 04 2023 at 13:05):

Oskar Hahn (Jul 04 2023 at 13:06):

Folkert de Vries (Jul 04 2023 at 13:07):

Folkert de Vries (Jul 04 2023 at 13:08):

Oskar Hahn (Jul 04 2023 at 13:10):

Oskar Hahn (Jul 04 2023 at 13:11):

Folkert de Vries (Jul 04 2023 at 13:13):

Folkert de Vries (Jul 04 2023 at 13:13):

Oskar Hahn (Jul 04 2023 at 13:14):

Folkert de Vries (Jul 04 2023 at 13:14):

Folkert de Vries (Jul 04 2023 at 13:15):

Folkert de Vries (Jul 04 2023 at 13:15):

Folkert de Vries (Jul 04 2023 at 13:16):

Folkert de Vries (Jul 04 2023 at 13:16):

Oskar Hahn (Jul 04 2023 at 13:20):

Folkert de Vries (Jul 04 2023 at 13:21):

Folkert de Vries (Jul 04 2023 at 13:23):

Folkert de Vries (Jul 04 2023 at 13:24):

Folkert de Vries (Jul 04 2023 at 13:24):

Folkert de Vries (Jul 04 2023 at 13:24):

Oskar Hahn (Jul 04 2023 at 13:25):

Oskar Hahn (Jul 04 2023 at 14:34):

Folkert de Vries (Jul 04 2023 at 14:41):

Folkert de Vries (Jul 04 2023 at 14:43):

Folkert de Vries (Jul 04 2023 at 14:43):

Oskar Hahn (Jul 04 2023 at 15:02):

Folkert de Vries (Jul 04 2023 at 15:07):

Folkert de Vries (Jul 04 2023 at 15:08):

Oskar Hahn (Jul 04 2023 at 20:19):

Brendan Hansknecht (Jul 04 2023 at 21:08):

Brendan Hansknecht (Jul 04 2023 at 21:08):

Brendan Hansknecht (Jul 04 2023 at 21:09):

Brendan Hansknecht (Jul 04 2023 at 21:11):

Brendan Hansknecht (Jul 04 2023 at 21:13):

Brendan Hansknecht (Jul 04 2023 at 21:14):

Oskar Hahn (Jul 04 2023 at 22:30):

Folkert de Vries (Jul 04 2023 at 22:54):

Folkert de Vries (Jul 04 2023 at 22:54):

Folkert de Vries (Jul 04 2023 at 22:55):

Folkert de Vries (Jul 04 2023 at 22:56):

Folkert de Vries (Jul 04 2023 at 22:56):