I'm building a parser for parsing simple arithmetic expressions. Unfortunately, I'm stuck, as my program just stack overflows when running it. At this point, I think I need pointers.
Here's my parser. Note: It's just a toy for my upcoming talk, so the quality might not be the best.
module [digit]
Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str
run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
when input |> Str.to_utf8 |> parser is
Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
Err(err) -> Err(err)
map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
@Parser(
|input|
parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
)
expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))
lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
@Parser(
|input|
@Parser(parser) = thunk({})
parser(input),
)
zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
@Parser(
|input|
helper = |acc, remaining|
when parser(remaining) is
Ok({ value, rest }) -> helper(List.append(acc, value), rest)
Err(_) -> Ok({ value: acc, rest: remaining })
helper([], input),
)
expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))
one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
parser
|> andThen(zero_or_more(parser))
|> map(|(head, tail)| List.prepend(tail, head))
expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")
orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
@Parser(
|input|
when parserA(input) is
Ok(result) -> Ok(result)
Err(_) -> parserB(input),
)
expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")
andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
@Parser(
|input|
{ value: valueA, rest: restA } = parserA(input)?
{ value: valueB, rest: restB } = parserB(restA)?
Ok({ value: (valueA, valueB), rest: restB }),
)
expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))
andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
parserA |> andThen(parserB) |> map(|(_, b)| b)
andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
parserA |> andThen(parserB) |> map(|(a, _)| a)
digit : Parser U8
digit =
parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
List.walk(parsers, symbol('0'), orElse)
expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")
natural : Parser U64
natural =
one_or_more(digit)
|> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))
expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))
symbol : U8 -> Parser U8
symbol = |c|
@Parser(
|input|
when input is
[d, .. as rest] if c == d ->
Ok({ value: c, rest })
[d, ..] ->
exp = Str.from_utf8([c]) ?? ""
act = Str.from_utf8([d]) ?? ""
Err("expected '${exp}', got '${act}'")
[] ->
exp = Str.from_utf8([c]) ?? ""
Err("expected '${exp}', got EOF"),
)
expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")
expr : Parser U64
expr =
term
|> andDropRight(symbol('+'))
|> andThen(lazy(|_| expr))
|> map(|(x, y)| x + y)
|> orElse(term)
term : Parser U64
term =
factor
|> andDropRight(symbol('*'))
|> andThen(lazy(|_| term))
|> map(|(x, y)| x * y)
|> orElse(factor)
factor : Parser U64
factor = natural
expect run(expr, "1") == Ok((3, ""))
When running it, it immediately errors out with a stack overflow:
thread '<unknown>' has overflowed its stack
fatal runtime error: stack overflow
Abort trap: 6
I assumed this was due to the recursive nature of expr
and term
, but after adding a lazy
combinator the problem persists. I've been looking at this code for hours now and I really can't figure it out. Either I've made some logical mistake in the code or I simply don't understand the problem.
Any help would be much appreciated!
I tried rewriting my combinators like so:
expr : Parser U64
expr =
term
|> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
|> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))
term : Parser U64
term =
factor
|> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
|> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))
factor : Parser U64
factor = natural |> orElse(symbol('(') |> andDropLeft(expr) |> andDropRight(symbol(')')))
Unfortunately, the result is the same, so I'm out of ideas at this point. :man_shrugging:
Anyone?
I'll take a look, need to rebuild the rust compiler
Thanks, @Brendan Hansknecht. Really appreciate it.
Oh, this isn't your app stack overflowing, this is roc compiler unification looping infinitely
Oh, really? So it’s a compiler bug? I’m actually glad to hear that as I spent hours trying to figure out what I did wrong yesterday.
How likely is it that this can be fixed short term? I’m having a presentation about Roc and parser combinators on Thursday and starting to get a bit stressed out as I don’t have working code yet. :sweat_smile:
If not fixed, can I work around it?
How likely is it that this can be fixed short term?
I think the chances are pretty slim. I think this is a bug with recursive lambaset unification and closure captures. Likely needs some sort of boxed break point otherwise will capture an inifinitely large dataset.
If not fixed, can I work around it?
I would assume there is a workaround, the question is what would it be.
Sadly due to lambasets, roc's function unification is brittle. Parser combinators (recursive opaque functions with captures) are about the most likely thing to break due to lambaset bugs.
lambaset bugs are a major part of the reason we are rewriting the compiler in general.
@Luke Boswell just curious if you have any ideas for a workaround. I know you worked with parser combinators in roc a good bit.
Got it. Damn it. I assume the new compiler is still work in progress as well?
Yeah, it is like a few weeks old. It doesn't compile anything yet.
The overflow happens when I call expr
from within factor
. I could drop support for parametrised expressions, but it would take away from what I’m trying to explain.
Let’s see if Luke or someone else can think of a workaround, otherwise I’ll have to rewrite this in Elm…
Thanks for help debugging this. :smile:
Oh, one thing I remember that might help. When defining a new parser, wrap it in a new lamba and @Parser
. So instead of
factor : Parser U64
factor = natural
Do:
factor : Parser U64
factor = @Parser(|x| natural(x))
Same with expr
and other parsers. That has fixed similar issues before.
I’ll try that. I also found https://github.com/roc-lang/roc/issues/7381 which seems relevant.
quite possibly
Could be a bug with how we create joinpoints in mutually tail recursive code.
I've got a few minutes now I'll poke at it and see if I can find anything.
Did it work up until a point?
I think it is just the last test an expr
that breaks things
Yes. The current version which is this:
module []
Parser a := List U8 -> ParseResult a
ParseResult a : Result { value : a, rest : List U8 } Str
run : Parser a, Str -> Result (a, Str) Str
run = |@Parser(parser), input|
when input |> Str.to_utf8 |> parser is
Ok({ value, rest }) -> Ok((value, Str.from_utf8(rest) ?? ""))
Err(err) -> Err(err)
map : Parser a, (a -> b) -> Parser b
map = |@Parser(parser), f|
@Parser(
|input|
parser(input) |> Result.map_ok(|{ value, rest }| { value: f(value), rest }),
)
expect run(map(digit, |n| n + 1), "123") == Ok(('2', "23"))
lazy : ({} -> Parser a) -> Parser a
lazy = |thunk|
@Parser(
|input|
@Parser(parser) = thunk({})
parser(input),
)
zero_or_more : Parser a -> Parser (List a)
zero_or_more = |@Parser(parser)|
@Parser(
|input|
helper = |acc, remaining|
when parser(remaining) is
Ok({ value, rest }) ->
# TODO: Not sure if this is needed. All parsers consume input.
if rest == remaining then
Ok({ value: acc, rest: remaining })
else
helper(List.append(acc, value), rest)
Err(_) -> Ok({ value: acc, rest: remaining })
helper([], input),
)
expect run(zero_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(zero_or_more(digit), "abc") == Ok(([], "abc"))
one_or_more : Parser a -> Parser (List a)
one_or_more = |parser|
parser
|> andThen(zero_or_more(parser))
|> map(|(head, tail)| List.prepend(tail, head))
expect run(one_or_more(digit), "123") == Ok((['1', '2', '3'], ""))
expect run(one_or_more(digit), "abc") == Err("expected '9', got 'a'")
orElse : Parser a, Parser a -> Parser a
orElse = |@Parser(parserA), @Parser(parserB)|
@Parser(
|input|
when parserA(input) is
Ok(result) -> Ok(result)
Err(_) -> parserB(input),
)
expect run(orElse(digit, symbol('a')), "123") == Ok(('1', "23"))
expect run(orElse(digit, symbol('a')), "abc") == Ok(('a', "bc"))
expect run(orElse(digit, symbol('a')), "") == Err("expected 'a', got EOF")
andThen : Parser a, Parser b -> Parser (a, b)
andThen = |@Parser(parserA), @Parser(parserB)|
@Parser(
|input|
{ value: valueA, rest: restA } = parserA(input)?
{ value: valueB, rest: restB } = parserB(restA)?
Ok({ value: (valueA, valueB), rest: restB }),
)
expect run(digit |> andThen(digit), "123") == Ok((('1', '2'), "3"))
andDropLeft : Parser a, Parser b -> Parser b
andDropLeft = |parserA, parserB|
parserA |> andThen(parserB) |> map(|(_, b)| b)
andDropRight : Parser a, Parser b -> Parser a
andDropRight = |parserA, parserB|
parserA |> andThen(parserB) |> map(|(a, _)| a)
digit : Parser U8
digit =
parsers = List.map(['1', '2', '3', '4', '5', '6', '7', '8', '9'], symbol)
List.walk(parsers, symbol('0'), orElse)
expect run(digit, "123") == Ok(('1', "23"))
expect run(digit, "abc") == Err("expected '9', got 'a'")
expect run(digit, "") == Err("expected '9', got EOF")
natural : Parser U64
natural =
one_or_more(digit)
|> map(|digits| List.walk(digits, 0, |acc, n| acc * 10 + Num.to_u64(n - '0')))
expect run(natural, "123") == Ok((123, ""))
expect run(natural, "1abc") == Ok((1, "abc"))
symbol : U8 -> Parser U8
symbol = |c|
@Parser(
|input|
when input is
[d, .. as rest] if c == d ->
Ok({ value: c, rest })
[d, ..] ->
exp = Str.from_utf8([c]) ?? ""
act = Str.from_utf8([d]) ?? ""
Err("expected '${exp}', got '${act}'")
[] ->
exp = Str.from_utf8([c]) ?? ""
Err("expected '${exp}', got EOF"),
)
expect run(symbol('a'), "abc") == Ok(('a', "bc"))
expect run(symbol('a'), "bbc") == Err("expected 'a', got 'b'")
expect run(symbol('a'), "") == Err("expected 'a', got EOF")
expr : Parser U64
expr =
term
|> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
|> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))
term : Parser U64
term =
factor
|> andThen(zero_or_more(symbol('*') |> andDropLeft(factor)))
|> map(|(x, xs)| List.walk(xs, x, |product, y| product * y))
factor : Parser U64
factor =
natural
|> orElse(
symbol('(')
|> andDropLeft(expr)
|> andDropRight(symbol(')')),
)
expect run(expr, "1+2") == Ok((3, ""))
works as long as I kill the orElse
branch in factor
.
So it's the fact that I call expr
from factor
.
|> andDropLeft(
@Parser(
|input|
@Parser(fn) = expr
fn(input),
),
)
I think that fixes it
Can also apply it at the definition site of expr
:
expr : Parser U64
expr =
@Parser(
|input|
@Parser(fn) =
term
|> andThen(zero_or_more(symbol('+') |> andDropLeft(term)))
|> map(|(x, xs)| List.walk(xs, x, |sum, y| sum + y))
fn(input),
)
Fixed it for me
That does indeed fix it! I assume my version should work, right? So I can use that in the slide for teaching purposes but then use this "workaround" for the sake of making it work.
Yes
To my understanding, this splits up the lambaset into two different lambasets that both unify happily. If lambasets weren't broken, this all should happily unify into a single lambdaset.
This is pretty neat. I like how you've put this together :smiley:
Sorry it hasn't worked as smoothly.
Thanks! It's been a learning experience for me as well. :sweat_smile: My goal is to show how you can create a handful of small combinators and combine them into something that correctly parses mathematical expressions such as 1 + 2 * 3 * (1 + 5)
.
While my parser isn't optimised at all, I think it does a good job in showing off how powerful parser combinators are.
Last updated: Jul 05 2025 at 12:14 UTC