Why is my parser causing a stack overflow? · beginners

I'm building a parser for parsing simple arithmetic expressions. Unfortunately, I'm stuck, as my program just stack overflows when running it. At this point, I think I need pointers.

Here's my parser. Note: It's just a toy for my upcoming talk, so the quality might not be the best.

module [digit]

Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str

run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
    when input |> Str.to_utf8 |> parser is
        Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
        Err(err) -> Err(err)

map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
    @Parser(
        |input|
            parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
    )

expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))

lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
    @Parser(
        |input|
            @Parser(parser) = thunk({})
            parser(input),
    )

zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
    @Parser(
        |input|
            helper = |acc, remaining|
                when parser(remaining) is
                    Ok({ value, rest }) -> helper(List.append(acc, value), rest)
                    Err(_) -> Ok({ value: acc, rest: remaining })
            helper([], input),
    )

expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))

one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
    parser
    |> andThen(zero_or_more(parser))
    |> map(|(head, tail)| List.prepend(tail, head))

expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")

orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            when parserA(input) is
                Ok(result) -> Ok(result)
                Err(_) -> parserB(input),
    )

expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")

andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            { value: valueA, rest: restA } = parserA(input)?
            { value: valueB, rest: restB } = parserB(restA)?
            Ok({ value: (valueA, valueB), rest: restB }),
    )

expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))

andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(_, b)| b)

andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(a, _)| a)

digit : Parser U8
digit =
    parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
    List.walk(parsers, symbol('0'), orElse)

expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")

natural : Parser U64
natural =
    one_or_more(digit)
    |> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))

expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))

symbol : U8 -> Parser U8
symbol = |c|
    @Parser(
        |input|
            when input is
                [d, .. as rest] if c == d ->
                    Ok({ value: c, rest })

                [d, ..] ->
                    exp = Str.from_utf8([c]) ?? ""
                    act = Str.from_utf8([d]) ?? ""
                    Err("expected '${exp}', got '${act}'")

                [] ->
                    exp = Str.from_utf8([c]) ?? ""
                    Err("expected '${exp}', got EOF"),
    )

expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")

expr : Parser U64
expr =
    term
    |> andDropRight(symbol('+'))
    |> andThen(lazy(|_| expr))
    |> map(|(x, y)| x + y)
    |> orElse(term)

term : Parser U64
term =
    factor
    |> andDropRight(symbol('*'))
    |> andThen(lazy(|_| term))
    |> map(|(x, y)| x * y)
    |> orElse(factor)

factor : Parser U64
factor = natural

expect run(expr, "1") == Ok((3, ""))

thread '<unknown>' has overflowed its stack
fatal runtime error: stack overflow
Abort trap: 6

I assumed this was due to the recursive nature of expr and term, but after adding a lazy combinator the problem persists. I've been looking at this code for hours now and I really can't figure it out. Either I've made some logical mistake in the code or I simply don't understand the problem.

Kevin Hovsäter (Feb 23 2025 at 17:26):

expr : Parser U64
expr =
    term
    |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
    |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))

term : Parser U64
term =
    factor
    |> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
    |> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))

factor : Parser U64
factor = natural |> orElse(symbol('(') |> andDropLeft(expr) |> andDropRight(symbol(')')))

Unfortunately, the result is the same, so I'm out of ideas at this point. :man_shrugging:

Kevin Hovsäter (Feb 24 2025 at 05:32):

Brendan Hansknecht (Feb 24 2025 at 05:42):

Kevin Hovsäter (Feb 24 2025 at 05:43):

Brendan Hansknecht (Feb 24 2025 at 05:48):

Oh, this isn't your app stack overflowing, this is roc compiler unification looping infinitely

Kevin Hovsäter (Feb 24 2025 at 05:52):

Oh, really? So it’s a compiler bug? I’m actually glad to hear that as I spent hours trying to figure out what I did wrong yesterday.

How likely is it that this can be fixed short term? I’m having a presentation about Roc and parser combinators on Thursday and starting to get a bit stressed out as I don’t have working code yet. :sweat_smile:

Brendan Hansknecht (Feb 24 2025 at 05:53):

I think the chances are pretty slim. I think this is a bug with recursive lambaset unification and closure captures. Likely needs some sort of boxed break point otherwise will capture an inifinitely large dataset.

Brendan Hansknecht (Feb 24 2025 at 05:54):

Brendan Hansknecht (Feb 24 2025 at 05:55):

Sadly due to lambasets, roc's function unification is brittle. Parser combinators (recursive opaque functions with captures) are about the most likely thing to break due to lambaset bugs.

Brendan Hansknecht (Feb 24 2025 at 05:56):

lambaset bugs are a major part of the reason we are rewriting the compiler in general.

Brendan Hansknecht (Feb 24 2025 at 05:57):

@Luke Boswell just curious if you have any ideas for a workaround. I know you worked with parser combinators in roc a good bit.

Kevin Hovsäter (Feb 24 2025 at 05:57):

Brendan Hansknecht (Feb 24 2025 at 05:57):

Kevin Hovsäter (Feb 24 2025 at 06:02):

The overflow happens when I call expr from within factor. I could drop support for parametrised expressions, but it would take away from what I’m trying to explain.

Let’s see if Luke or someone else can think of a workaround, otherwise I’ll have to rewrite this in Elm…

Brendan Hansknecht (Feb 24 2025 at 06:05):

Oh, one thing I remember that might help. When defining a new parser, wrap it in a new lamba and @Parser. So instead of

factor : Parser U64
factor = natural

factor : Parser U64
factor = @Parser(|x| natural(x))

Kevin Hovsäter (Feb 24 2025 at 06:06):

Brendan Hansknecht (Feb 24 2025 at 06:08):

Luke Boswell (Feb 24 2025 at 07:40):

Luke Boswell (Feb 24 2025 at 07:42):

Brendan Hansknecht (Feb 24 2025 at 07:43):

Kevin Hovsäter (Feb 24 2025 at 07:43):

module []

Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str

run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
    when input |> Str.to_utf8 |> parser is
        Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
        Err(err) -> Err(err)

map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
    @Parser(
        |input|
            parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
    )

expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))

lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
    @Parser(
        |input|
            @Parser(parser) = thunk({})
            parser(input),
    )

zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
    @Parser(
        |input|
            helper = |acc, remaining|
                when parser(remaining) is
                    Ok({ value, rest }) ->
                        # TODO: Not sure if this is needed. All parsers consume input.
                        if rest == remaining then
                            Ok({ value: acc, rest: remaining })
                        else
                            helper(List.append(acc, value), rest)

                    Err(_) -> Ok({ value: acc, rest: remaining })
            helper([], input),
    )

expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))

one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
    parser
    |> andThen(zero_or_more(parser))
    |> map(|(head, tail)| List.prepend(tail, head))

expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")

orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            when parserA(input) is
                Ok(result) -> Ok(result)
                Err(_) -> parserB(input),
    )

expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")

andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
    @Parser(
        |input|
            { value: valueA, rest: restA } = parserA(input)?
            { value: valueB, rest: restB } = parserB(restA)?
            Ok({ value: (valueA, valueB), rest: restB }),
    )

expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))

andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(_, b)| b)

andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
    parserA |> andThen(parserB) |> map(|(a, _)| a)

digit : Parser U8
digit =
    parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
    List.walk(parsers, symbol('0'), orElse)

expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")

natural : Parser U64
natural =
    one_or_more(digit)
    |> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))

expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))

symbol : U8 -> Parser U8
symbol = |c|
    @Parser(
        |input|
            when input is
                [d, .. as rest] if c == d ->
                    Ok({ value: c, rest })

                [d, ..] ->
                    exp = Str.from_utf8([c]) ?? ""
                    act = Str.from_utf8([d]) ?? ""
                    Err("expected '${exp}', got '${act}'")

                [] ->
                    exp = Str.from_utf8([c]) ?? ""
                    Err("expected '${exp}', got EOF"),
    )

expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")

expr : Parser U64
expr =
    term
    |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
    |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))

term : Parser U64
term =
    factor
    |> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
    |> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))

factor : Parser U64
factor =
    natural
    |> orElse(
        symbol('(')
        |> andDropLeft(expr)
        |> andDropRight(symbol(')')),
    )

expect run(expr, "1+2") == Ok((3, ""))

Kevin Hovsäter (Feb 24 2025 at 07:44):

Brendan Hansknecht (Feb 24 2025 at 07:46):

        |> andDropLeft(
            @Parser(
                |input|
                    @Parser(fn) = expr
                    fn(input),
            ),
        )

Brendan Hansknecht (Feb 24 2025 at 07:46):

Brendan Hansknecht (Feb 24 2025 at 07:48):

expr : Parser U64
expr =
    @Parser(
        |input|
            @Parser(fn) =
                term
                |> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
                |> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))
            fn(input),
    )

Luke Boswell (Feb 24 2025 at 07:48):

Kevin Hovsäter (Feb 24 2025 at 07:51):

That does indeed fix it! I assume my version should work, right? So I can use that in the slide for teaching purposes but then use this "workaround" for the sake of making it work.

Brendan Hansknecht (Feb 24 2025 at 07:52):

Brendan Hansknecht (Feb 24 2025 at 07:53):

To my understanding, this splits up the lambaset into two different lambasets that both unify happily. If lambasets weren't broken, this all should happily unify into a single lambdaset.

Luke Boswell (Feb 24 2025 at 07:53):

Luke Boswell (Feb 24 2025 at 07:54):

Kevin Hovsäter (Feb 24 2025 at 07:56):

Thanks! It's been a learning experience for me as well. :sweat_smile: My goal is to show how you can create a handful of small combinators and combine them into something that correctly parses mathematical expressions such as 1 + 2 * 3 * (1 + 5).

While my parser isn't optimised at all, I think it does a good job in showing off how powerful parser combinators are.

Stream: beginners

Topic: Why is my parser causing a stack overflow?

Kevin Hovsäter (Feb 23 2025 at 16:34):

Kevin Hovsäter (Feb 23 2025 at 17:26):

Kevin Hovsäter (Feb 24 2025 at 05:32):

Brendan Hansknecht (Feb 24 2025 at 05:42):

Kevin Hovsäter (Feb 24 2025 at 05:43):

Brendan Hansknecht (Feb 24 2025 at 05:48):

Kevin Hovsäter (Feb 24 2025 at 05:52):

Brendan Hansknecht (Feb 24 2025 at 05:53):

Brendan Hansknecht (Feb 24 2025 at 05:54):

Brendan Hansknecht (Feb 24 2025 at 05:55):

Brendan Hansknecht (Feb 24 2025 at 05:56):

Brendan Hansknecht (Feb 24 2025 at 05:57):

Kevin Hovsäter (Feb 24 2025 at 05:57):

Brendan Hansknecht (Feb 24 2025 at 05:57):

Kevin Hovsäter (Feb 24 2025 at 06:02):

Brendan Hansknecht (Feb 24 2025 at 06:05):

Kevin Hovsäter (Feb 24 2025 at 06:06):

Brendan Hansknecht (Feb 24 2025 at 06:08):

Brendan Hansknecht (Feb 24 2025 at 06:08):

Luke Boswell (Feb 24 2025 at 07:40):

Luke Boswell (Feb 24 2025 at 07:42):

Brendan Hansknecht (Feb 24 2025 at 07:43):

Kevin Hovsäter (Feb 24 2025 at 07:43):

Kevin Hovsäter (Feb 24 2025 at 07:44):

Brendan Hansknecht (Feb 24 2025 at 07:46):

Brendan Hansknecht (Feb 24 2025 at 07:46):

Brendan Hansknecht (Feb 24 2025 at 07:48):

Luke Boswell (Feb 24 2025 at 07:48):

Kevin Hovsäter (Feb 24 2025 at 07:51):

Brendan Hansknecht (Feb 24 2025 at 07:52):

Brendan Hansknecht (Feb 24 2025 at 07:53):

Luke Boswell (Feb 24 2025 at 07:53):

Luke Boswell (Feb 24 2025 at 07:54):

Kevin Hovsäter (Feb 24 2025 at 07:56):