Stream: beginners

Topic: Why is my parser causing a stack overflow?


view this post on Zulip Kevin Hovsäter (Feb 23 2025 at 16:34):

I'm building a parser for parsing simple arithmetic expressions. Unfortunately, I'm stuck, as my program just stack overflows when running it. At this point, I think I need pointers.

Here's my parser. Note: It's just a toy for my upcoming talk, so the quality might not be the best.

module [digit]

Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str

run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
    when input |> Str.to_utf8 |> parser is
        Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
        Err(err) -> Err(err)

map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
    @Parser(
        |input|
            parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
    )

expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))

lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
    @Parser(
        |input|
            @Parser(parser) = thunk({})
            parser(input),
    )

zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
    @Parser(
        |input|
            helper = |acc, remaining|
                when parser(remaining) is
                    Ok({ value, rest }) -> helper(List.append(acc, value), rest)
                    Err(_) -> Ok({ value: acc, rest: remaining })
            helper([], input),
    )

expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))

one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
    parser
    |> andThen(zero_or_more(parser))
    |> map(|(head, tail)| List.prepend(tail, head))

expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")

orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            when parserA(input) is
                Ok(result) -> Ok(result)
                Err(_) -> parserB(input),
    )

expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")

andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            { value: valueA, rest: restA } = parserA(input)?
            { value: valueB, rest: restB } = parserB(restA)?
            Ok({ value: (valueA, valueB), rest: restB }),
    )

expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))

andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(_, b)| b)

andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(a, _)| a)

digit : Parser U8
digit =
    parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
    List.walk(parsers, symbol('0'), orElse)

expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")

natural : Parser U64
natural =
    one_or_more(digit)
    |> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))

expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))

symbol : U8 -> Parser U8
symbol = |c|
    @Parser(
        |input|
            when input is
                [d, .. as rest] if c == d ->
                    Ok({ value: c, rest })

                [d, ..] ->
                    exp = Str.from_utf8([c]) ?? ""
                    act = Str.from_utf8([d]) ?? ""
                    Err("expected '${exp}', got '${act}'")

                [] ->
                    exp = Str.from_utf8([c]) ?? ""
                    Err("expected '${exp}', got EOF"),
    )

expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")

expr : Parser U64
expr =
    term
    |> andDropRight(symbol('+'))
    |> andThen(lazy(|_| expr))
    |> map(|(x, y)| x + y)
    |> orElse(term)

term : Parser U64
term =
    factor
    |> andDropRight(symbol('*'))
    |> andThen(lazy(|_| term))
    |> map(|(x, y)| x * y)
    |> orElse(factor)

factor : Parser U64
factor = natural

expect run(expr, "1") == Ok((3, ""))

When running it, it immediately errors out with a stack overflow:

thread '<unknown>' has overflowed its stack
fatal runtime error: stack overflow
Abort trap: 6

I assumed this was due to the recursive nature of expr and term, but after adding a lazy combinator the problem persists. I've been looking at this code for hours now and I really can't figure it out. Either I've made some logical mistake in the code or I simply don't understand the problem.

Any help would be much appreciated!

view this post on Zulip Kevin Hovsäter (Feb 23 2025 at 17:26):

I tried rewriting my combinators like so:

expr : Parser U64
expr =
    term
    |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
    |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))

term : Parser U64
term =
    factor
    |> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
    |> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))

factor : Parser U64
factor = natural |> orElse(symbol('(') |> andDropLeft(expr) |> andDropRight(symbol(')')))

Unfortunately, the result is the same, so I'm out of ideas at this point. :man_shrugging:

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 05:32):

Anyone?

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:42):

I'll take a look, need to rebuild the rust compiler

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 05:43):

Thanks, @Brendan Hansknecht. Really appreciate it.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:48):

Oh, this isn't your app stack overflowing, this is roc compiler unification looping infinitely

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 05:52):

Oh, really? So it’s a compiler bug? I’m actually glad to hear that as I spent hours trying to figure out what I did wrong yesterday.

How likely is it that this can be fixed short term? I’m having a presentation about Roc and parser combinators on Thursday and starting to get a bit stressed out as I don’t have working code yet. :sweat_smile:

If not fixed, can I work around it?

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:53):

How likely is it that this can be fixed short term?

I think the chances are pretty slim. I think this is a bug with recursive lambaset unification and closure captures. Likely needs some sort of boxed break point otherwise will capture an inifinitely large dataset.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:54):

If not fixed, can I work around it?

I would assume there is a workaround, the question is what would it be.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:55):

Sadly due to lambasets, roc's function unification is brittle. Parser combinators (recursive opaque functions with captures) are about the most likely thing to break due to lambaset bugs.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:56):

lambaset bugs are a major part of the reason we are rewriting the compiler in general.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:57):

@Luke Boswell just curious if you have any ideas for a workaround. I know you worked with parser combinators in roc a good bit.

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 05:57):

Got it. Damn it. I assume the new compiler is still work in progress as well?

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 05:57):

Yeah, it is like a few weeks old. It doesn't compile anything yet.

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 06:02):

The overflow happens when I call expr from within factor. I could drop support for parametrised expressions, but it would take away from what I’m trying to explain.

Let’s see if Luke or someone else can think of a workaround, otherwise I’ll have to rewrite this in Elm…

Thanks for help debugging this. :smile:

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 06:05):

Oh, one thing I remember that might help. When defining a new parser, wrap it in a new lamba and @Parser. So instead of

factor : Parser U64
factor = natural

Do:

factor : Parser U64
factor = @Parser(|x| natural(x))

Same with expr and other parsers. That has fixed similar issues before.

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 06:06):

I’ll try that. I also found https://github.com/roc-lang/roc/issues/7381 which seems relevant.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 06:08):

quite possibly

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 06:08):

Could be a bug with how we create joinpoints in mutually tail recursive code.

view this post on Zulip Luke Boswell (Feb 24 2025 at 07:40):

I've got a few minutes now I'll poke at it and see if I can find anything.

view this post on Zulip Luke Boswell (Feb 24 2025 at 07:42):

Did it work up until a point?

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:43):

I think it is just the last test an expr that breaks things

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 07:43):

Yes. The current version which is this:

module []

Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str

run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
    when input |> Str.to_utf8 |> parser is
        Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
        Err(err) -> Err(err)

map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
    @Parser(
        |input|
            parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
    )

expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))

lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
    @Parser(
        |input|
            @Parser(parser) = thunk({})
            parser(input),
    )

zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
    @Parser(
        |input|
            helper = |acc, remaining|
                when parser(remaining) is
                    Ok({ value, rest }) ->
                        # TODO: Not sure if this is needed. All parsers consume input.
                        if rest == remaining then
                            Ok({ value: acc, rest: remaining })
                        else
                            helper(List.append(acc, value), rest)

                    Err(_) -> Ok({ value: acc, rest: remaining })
            helper([], input),
    )

expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))

one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
    parser
    |> andThen(zero_or_more(parser))
    |> map(|(head, tail)| List.prepend(tail, head))

expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")

orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            when parserA(input) is
                Ok(result) -> Ok(result)
                Err(_) -> parserB(input),
    )

expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")

andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            { value: valueA, rest: restA } = parserA(input)?
            { value: valueB, rest: restB } = parserB(restA)?
            Ok({ value: (valueA, valueB), rest: restB }),
    )

expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))

andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(_, b)| b)

andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(a, _)| a)

digit : Parser U8
digit =
    parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
    List.walk(parsers, symbol('0'), orElse)

expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")

natural : Parser U64
natural =
    one_or_more(digit)
    |> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))

expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))

symbol : U8 -> Parser U8
symbol = |c|
    @Parser(
        |input|
            when input is
                [d, .. as rest] if c == d ->
                    Ok({ value: c, rest })

                [d, ..] ->
                    exp = Str.from_utf8([c]) ?? ""
                    act = Str.from_utf8([d]) ?? ""
                    Err("expected '${exp}', got '${act}'")

                [] ->
                    exp = Str.from_utf8([c]) ?? ""
                    Err("expected '${exp}', got EOF"),
    )

expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")

expr : Parser U64
expr =
    term
    |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
    |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))

term : Parser U64
term =
    factor
    |> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
    |> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))

factor : Parser U64
factor =
    natural
    |> orElse(
        symbol('(')
        |> andDropLeft(expr)
        |> andDropRight(symbol(')')),
    )

expect run(expr, "1+2") == Ok((3, ""))

works as long as I kill the orElse branch in factor.

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 07:44):

So it's the fact that I call expr from factor.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:46):

        |> andDropLeft(
            @Parser(
                |input|
                    @Parser(fn) = expr
                    fn(input),
            ),
        )

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:46):

I think that fixes it

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:48):

Can also apply it at the definition site of expr:

expr : Parser U64
expr =
    @Parser(
        |input|
            @Parser(fn) =
                term
                |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
                |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))
            fn(input),
    )

view this post on Zulip Luke Boswell (Feb 24 2025 at 07:48):

Fixed it for me

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 07:51):

That does indeed fix it! I assume my version should work, right? So I can use that in the slide for teaching purposes but then use this "workaround" for the sake of making it work.

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:52):

Yes

view this post on Zulip Brendan Hansknecht (Feb 24 2025 at 07:53):

To my understanding, this splits up the lambaset into two different lambasets that both unify happily. If lambasets weren't broken, this all should happily unify into a single lambdaset.

view this post on Zulip Luke Boswell (Feb 24 2025 at 07:53):

This is pretty neat. I like how you've put this together :smiley:

view this post on Zulip Luke Boswell (Feb 24 2025 at 07:54):

Sorry it hasn't worked as smoothly.

view this post on Zulip Kevin Hovsäter (Feb 24 2025 at 07:56):

Thanks! It's been a learning experience for me as well. :sweat_smile: My goal is to show how you can create a handful of small combinators and combine them into something that correctly parses mathematical expressions such as 1 + 2 * 3 * (1 + 5).

While my parser isn't optimised at all, I think it does a good job in showing off how powerful parser combinators are.


Last updated: Jul 05 2025 at 12:14 UTC