From b9741e87bd2b343e7f26197f59fe58f20659f7ec Mon Sep 17 00:00:00 2001 From: Joe Groocock Date: Tue, 28 Sep 2021 22:40:49 +0100 Subject: [PATCH] lang: interpolate: Fix string interpolation of dollar symbols Dollar symbols were failing to parse when not followed by a non-brace, non-dollar, non-EOF token and causing expected tests to fail. This simplifies the rules to allow the remaining tests to succeed. Fix and reinstate the final few failing tests, and add another. Allow any escape sequence to be matched so that invalid sequences produce a meaningful error message instead of a generic "cannot parse": ast: interpolate: interpolating: V: \? unhandled escape sequence token: \? Tidy the related Makefile rule for generating the ragel parser. Signed-off-by: Joe Groocock --- lang/Makefile | 9 +++---- lang/interpolate/parse.rl | 19 +++++--------- .../TestAstFunc2/escaping1.output | 5 ++++ .../TestAstFunc2/escaping1/main.mcl | 26 +++++++------------ .../TestAstFunc2/escaping4.output | 2 +- .../TestAstFunc2/escaping5.output | 2 +- 6 files changed, 26 insertions(+), 37 deletions(-) diff --git a/lang/Makefile b/lang/Makefile index 1bf5e42c..35217c71 100644 --- a/lang/Makefile +++ b/lang/Makefile @@ -47,13 +47,10 @@ endif interpolate/parse.generated.go: interpolate/parse.rl @echo "Generating: interpolation..." - ragel -Z -G2 -o interpolate/parse.generated.go interpolate/parse.rl - #@ROOT="$$( cd "$$( dirname "$${BASH_SOURCE[0]}" )" && cd .. && pwd )" && $$ROOT/misc/header.sh 'interpolate/parse.generated.go' - # XXX: I have no idea why I need to sed twice. I give up :P + ragel -Z -G2 -o $@ $< # remove the ragel header so our header test passes - @sed -i -e "1d" 'interpolate/parse.generated.go' - @sed -i -e "1d" 'interpolate/parse.generated.go' - gofmt -s -w 'interpolate/parse.generated.go' + sed -i -e 1,2d $@ + gofmt -s -w $@ fuzz: @$(MAKE) --quiet -C fuzz diff --git a/lang/interpolate/parse.rl b/lang/interpolate/parse.rl index 15a36edc..e006df4e 100644 --- a/lang/interpolate/parse.rl +++ b/lang/interpolate/parse.rl @@ -76,7 +76,7 @@ func Parse(data string) (out Stream, _ error) { var = '${' var_name '}' ; # Any special escape characters are matched here. - escaped_lit = '\\' ( 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | '"' | '$' ) + escaped_lit = '\\' (any) @{ switch s := data[fpc:fpc+1]; s { case "a": @@ -105,23 +105,16 @@ func Parse(data string) (out Stream, _ error) { // x = "\x00" default: //x = s // in case we want to avoid erroring - // this is a programming (parser) error I think - return nil, fmt.Errorf("unhandled escape sequence token: %s", s) + return nil, fmt.Errorf("unknown escape sequence: \\%s", s) } l = Literal{Value: x} }; - # XXX: explicitly try and add this one? - #escape_lit = '\\\\' - #@{ - # l = Literal{Value: "\\\\"} - #}; - - # Anything followed by a '$' that is not a '{' is used as-is - # with the dollar. - dollar_lit = '$' (any - '{') + # A lone dollar is a literal, if it is not a var. The `token` rule + # declares a var match is attempted first, else a `lit` and thus this. + dollar_lit = '$' @{ - l = Literal{Value: data[fpc-1:fpc+1]} + l = Literal{Value: data[fpc:fpc+1]} }; # Literal strings that don't contain '$' or '\'. diff --git a/lang/interpret_test/TestAstFunc2/escaping1.output b/lang/interpret_test/TestAstFunc2/escaping1.output index 53fca49c..dd5a3486 100644 --- a/lang/interpret_test/TestAstFunc2/escaping1.output +++ b/lang/interpret_test/TestAstFunc2/escaping1.output @@ -19,3 +19,8 @@ Vertex: test[R: \This is r1 EOF] Vertex: test[S: \$ EOF] Vertex: test[T: newline EOF] +Vertex: test[U: tab \ tabEOF] +Vertex: test[W: \$] +Vertex: test[X: $This is x1 EOF] +Vertex: test[Y: ${unused} EOF] +Vertex: test[Z: $$$] diff --git a/lang/interpret_test/TestAstFunc2/escaping1/main.mcl b/lang/interpret_test/TestAstFunc2/escaping1/main.mcl index 87ff5475..dff9b054 100644 --- a/lang/interpret_test/TestAstFunc2/escaping1/main.mcl +++ b/lang/interpret_test/TestAstFunc2/escaping1/main.mcl @@ -46,23 +46,17 @@ test "S: \\$ EOF" {} test "T: newline\nEOF" {} -# XXX: possible bugs or misunderstood expectations: +test "U: tab\t\\\ttabEOF" {} -#test "W: \\$" {} -# got: -# exp: W: \$ +# test "V: " {} -#$x1 = "This is x1" -#test "X: $${x1} EOF" {} -# got: X: $${x1} EOF -# exp: X: $This is x1 EOF +test "W: \\$" {} -#$unused = "i am unused" -#$y1 = "{unused}" -#test "Y: $${y1} EOF" {} # check there isn't double parsing -# got: Y: $${y1} EOF -# exp: Y: ${unused} EOF +$x1 = "This is x1" +test "X: $${x1} EOF" {} -#test "Z: $$$" {} -# got: -# exp: Z: $$$ EOF +$unused = "i am unused" +$y1 = "{unused}" +test "Y: $${y1} EOF" {} # check there isn't double parsing + +test "Z: $$$" {} diff --git a/lang/interpret_test/TestAstFunc2/escaping4.output b/lang/interpret_test/TestAstFunc2/escaping4.output index 3315675b..a75274ec 100644 --- a/lang/interpret_test/TestAstFunc2/escaping4.output +++ b/lang/interpret_test/TestAstFunc2/escaping4.output @@ -1 +1 @@ -# err: errInterpolate: parser failed: cannot parse string: X: \z +# err: errInterpolate: parser failed: unknown escape sequence: \z diff --git a/lang/interpret_test/TestAstFunc2/escaping5.output b/lang/interpret_test/TestAstFunc2/escaping5.output index c2d5f06f..c81498b1 100644 --- a/lang/interpret_test/TestAstFunc2/escaping5.output +++ b/lang/interpret_test/TestAstFunc2/escaping5.output @@ -1 +1 @@ -# err: errInterpolate: parser failed: cannot parse string: X: there is no \j sequence +# err: errInterpolate: parser failed: unknown escape sequence: \j