From 57ce3fa587897d74634c1216af67dd42252c64e5 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Sat, 22 Sep 2018 10:37:29 -0400 Subject: [PATCH] lang: Allow matching underscores in some of the identifier's This allows matching underscores in some of the identifier's, but not when they're the last character. This caused me to suffer a bit of pain tracking down a bug which turned out to be in the lexer. It started with a failing test that I wrote in: https://github.com/blynn/nex/commit/974c2498c46eaf4fb3963cfc059bcefd66ea48a8 and which followed with a fix in: https://github.com/blynn/nex/commit/52682f463a45fdc37630e650d82f126a32d810a3 Glad that's fixed! --- lang/lexer.nex | 36 +++++++++++---------- lang/lexparse_test.go | 74 +++++++++++++++++++++++++++++++++++++++++++ lang/parser.y | 43 +++++++++++++++++++------ lang/structs.go | 8 +++++ 4 files changed, 135 insertions(+), 26 deletions(-) diff --git a/lang/lexer.nex b/lang/lexer.nex index ba26e671..3b6e9753 100644 --- a/lang/lexer.nex +++ b/lang/lexer.nex @@ -289,7 +289,7 @@ panic(fmt.Sprintf("error lexing FLOAT, got: %v", err)) } } -/\$[a-z][a-z0-9]*{[0-9]+}/ +/\$[a-z]+([a-z0-9_]*[a-z0-9]+)?{[0-9]+}/ { // we have this as a single token, because otherwise the // parser can get confused by the curly brackets :/ @@ -323,39 +323,39 @@ panic(fmt.Sprintf("error lexing VAR_IDENTIFIER_HX, got: %v", err)) } } -/\$[a-z][a-z0-9]*/ +/\$[a-z]([a-z0-9_]*[a-z0-9]+)?/ { + // an alternate pattern: /\$[a-z](|[a-z0-9_]*[a-z0-9])/ yylex.pos(lval) // our pos s := yylex.Text() lval.str = s[1:len(s)] // remove the leading $ return VAR_IDENTIFIER } -/[A-Z][a-z0-9:]*[a-z0-9]*/ +/[a-z]([a-z0-9_]*[a-z0-9]+)?/ { yylex.pos(lval) // our pos - s := yylex.Text() - lval.str = strings.ToLower(s) // uncapitalize it - return CAPITALIZED_RES_IDENTIFIER + lval.str = yylex.Text() + return IDENTIFIER } -/[a-z][a-z0-9:]*[a-z0-9]*/ - { - yylex.pos(lval) // our pos - s := yylex.Text() - lval.str = strings.ToLower(s) // uncapitalize it - return RES_IDENTIFIER - } -/[A-Z][a-z0-9]*/ +/[A-Z]([a-z0-9_]*[a-z0-9]+)?/ { yylex.pos(lval) // our pos s := yylex.Text() lval.str = strings.ToLower(s) // uncapitalize it return CAPITALIZED_IDENTIFIER } -/[a-z][a-z0-9]*/ +/[a-z]([a-z0-9:]*[a-z0-9]+)?/ { yylex.pos(lval) // our pos lval.str = yylex.Text() - return IDENTIFIER + return RES_IDENTIFIER + } +/[A-Z]([a-z0-9:]*[a-z0-9]+)?/ + { + yylex.pos(lval) // our pos + s := yylex.Text() + lval.str = strings.ToLower(s) // uncapitalize it + return CAPITALIZED_RES_IDENTIFIER } /#[^\n]*/ { // this matches a (#) pound char followed by any @@ -405,3 +405,7 @@ import ( "fmt" "strconv" ) + +// NOTE: +// Among rules in the same scope, the longest matching pattern takes precedence. +// In event of a tie, the first pattern wins. diff --git a/lang/lexparse_test.go b/lang/lexparse_test.go index 27474a0e..5aecfd63 100644 --- a/lang/lexparse_test.go +++ b/lang/lexparse_test.go @@ -241,6 +241,34 @@ func TestLexParse0(t *testing.T) { //exp: ???, // FIXME: add the expected AST }) } + { + values = append(values, test{ + name: "maps 1", + code: ` + # make sure the "str:" part doesn't match a single ident + $strmap map{str: int} = { + "key1" => 42, + "key2" => -13, + } + `, + fail: false, + //exp: ???, // FIXME: add the expected AST + }) + } + { + values = append(values, test{ + name: "maps 2", + code: ` + $mapstrintlist map{str: []int} = { + "key1" => [42, 44,], + "key2" => [], + "key3" => [-13,], + } + `, + fail: false, + //exp: ???, // FIXME: add the expected AST + }) + } { values = append(values, test{ name: "maps and lists", @@ -1268,6 +1296,52 @@ func TestLexParse0(t *testing.T) { fail: true, }) } + { + exp := &StmtProg{ + Prog: []interfaces.Stmt{ + &StmtClass{ + Name: "x", + Args: []*Arg{}, + Body: &StmtProg{ + Prog: []interfaces.Stmt{}, + }, + }, + &StmtClass{ + Name: "y1", + Args: []*Arg{}, + Body: &StmtProg{ + Prog: []interfaces.Stmt{}, + }, + }, + &StmtInclude{ + Name: "z", + Args: nil, + }, + }, + } + values = append(values, test{ + name: "simple class with args 0", + code: ` + class x() { + } + class y1() { + } + include z + `, + fail: false, + exp: exp, + }) + } + { + values = append(values, test{ + name: "simple class underscore failure", + code: ` + class x_() { + } + `, + fail: true, + }) + } { exp := &StmtProg{ Prog: []interfaces.Stmt{ diff --git a/lang/parser.y b/lang/parser.y index 6bb6237d..80c3efdb 100644 --- a/lang/parser.y +++ b/lang/parser.y @@ -754,16 +754,17 @@ resource: Contents: $4.resContents, } } -// TODO: do we need to include this simpler case as well? -//| IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY -// { -// posLast(yylex, yyDollar) // our pos -// $$.stmt = &StmtRes{ -// Kind: $1.str, -// Name: $2.expr, -// Contents: $4.resContents, -// } -// } + // note: this is a simplified version of the above if the lexer picks it + // note: must not include underscores, but that is checked after parsing +| IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY + { + posLast(yylex, yyDollar) // our pos + $$.stmt = &StmtRes{ + Kind: $1.str, + Name: $2.expr, + Contents: $4.resContents, + } + } ; resource_body: /* end of list */ @@ -885,6 +886,17 @@ edge_half: //SendRecv: "", // unused } } + // note: this is a simplified version of the above if the lexer picks it + // note: must not include underscores, but that is checked after parsing +| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK + { + posLast(yylex, yyDollar) // our pos + $$.edgeHalf = &StmtEdgeHalf{ + Kind: $1.str, + Name: $3.expr, + //SendRecv: "", // unused + } + } ; edge_half_sendrecv: // eg: Test["t1"].foo_send @@ -897,6 +909,17 @@ edge_half_sendrecv: SendRecv: $6.str, } } + // note: this is a simplified version of the above if the lexer picks it + // note: must not include underscores, but that is checked after parsing +| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK DOT IDENTIFIER + { + posLast(yylex, yyDollar) // our pos + $$.edgeHalf = &StmtEdgeHalf{ + Kind: $1.str, + Name: $3.expr, + SendRecv: $6.str, + } + } ; type: BOOL_IDENTIFIER diff --git a/lang/structs.go b/lang/structs.go index 22fae189..c35790e3 100644 --- a/lang/structs.go +++ b/lang/structs.go @@ -164,6 +164,10 @@ func (obj *StmtRes) Apply(fn func(interfaces.Node) error) error { // Init initializes this branch of the AST, and returns an error if it fails to // validate. func (obj *StmtRes) Init(data *interfaces.Data) error { + if strings.Contains(obj.Kind, "_") { + return fmt.Errorf("kind must not contain underscores") + } + obj.data = data if err := obj.Name.Init(data); err != nil { return err @@ -1058,6 +1062,10 @@ func (obj *StmtEdgeHalf) Apply(fn func(interfaces.Node) error) error { // Init initializes this branch of the AST, and returns an error if it fails to // validate. func (obj *StmtEdgeHalf) Init(data *interfaces.Data) error { + if strings.Contains(obj.Kind, "_") { + return fmt.Errorf("kind must not contain underscores") + } + return obj.Name.Init(data) }