lang: Allow matching underscores in some of the identifier's

This allows matching underscores in some of the identifier's, but not when they're the last character. This caused me to suffer a bit of pain tracking down a bug which turned out to be in the lexer. It started with a failing test that I wrote in: 974c2498c4 and which followed with a fix in: 52682f463a Glad that's fixed!
2018-09-22 10:37:29 -04:00
parent a26620da38
commit 57ce3fa587
4 changed files with 135 additions and 26 deletions
--- a/lang/lexer.nex
+++ b/lang/lexer.nex
@@ -289,7 +289,7 @@
 				panic(fmt.Sprintf("error lexing FLOAT, got: %v", err))
 			}
 		}
-/\$[a-z][a-z0-9]*{[0-9]+}/
+/\$[a-z]+([a-z0-9_]*[a-z0-9]+)?{[0-9]+}/
 		{
 			// we have this as a single token, because otherwise the
 			// parser can get confused by the curly brackets :/
@@ -323,39 +323,39 @@
 				panic(fmt.Sprintf("error lexing VAR_IDENTIFIER_HX, got: %v", err))
 			}
 		}
-/\$[a-z][a-z0-9]*/
+/\$[a-z]([a-z0-9_]*[a-z0-9]+)?/
 		{
 			// an alternate pattern: /\$[a-z](|[a-z0-9_]*[a-z0-9])/
 			yylex.pos(lval) // our pos
 			s := yylex.Text()
 			lval.str = s[1:len(s)] // remove the leading $
 			return VAR_IDENTIFIER
 		}
-/[A-Z][a-z0-9:]*[a-z0-9]*/
+/[a-z]([a-z0-9_]*[a-z0-9]+)?/
 		{
 			yylex.pos(lval) // our pos
-			s := yylex.Text()
+			lval.str = yylex.Text()
-			lval.str = strings.ToLower(s) // uncapitalize it
+			return IDENTIFIER
 			return CAPITALIZED_RES_IDENTIFIER
 		}
-/[a-z][a-z0-9:]*[a-z0-9]*/
+/[A-Z]([a-z0-9_]*[a-z0-9]+)?/
 		{
 			yylex.pos(lval) // our pos
 			s := yylex.Text()
 			lval.str = strings.ToLower(s) // uncapitalize it
 			return RES_IDENTIFIER
 		}
 /[A-Z][a-z0-9]*/
 		{
 			yylex.pos(lval) // our pos
 			s := yylex.Text()
 			lval.str = strings.ToLower(s) // uncapitalize it
 			return CAPITALIZED_IDENTIFIER
 		}
-/[a-z][a-z0-9]*/
+/[a-z]([a-z0-9:]*[a-z0-9]+)?/
 		{
 			yylex.pos(lval) // our pos
 			lval.str = yylex.Text()
-			return IDENTIFIER
+			return RES_IDENTIFIER
 		}
 /[A-Z]([a-z0-9:]*[a-z0-9]+)?/
 		{
 			yylex.pos(lval) // our pos
 			s := yylex.Text()
 			lval.str = strings.ToLower(s) // uncapitalize it
 			return CAPITALIZED_RES_IDENTIFIER
 		}
 /#[^\n]*/
 		{	// this matches a (#) pound char followed by any
@@ -405,3 +405,7 @@ import (
 	"fmt"
 	"strconv"
 )
 // NOTE:
 // Among rules in the same scope, the longest matching pattern takes precedence.
 // In event of a tie, the first pattern wins.
--- a/lang/lexparse_test.go
+++ b/lang/lexparse_test.go
@@ -241,6 +241,34 @@ func TestLexParse0(t *testing.T) {
 			//exp: ???, // FIXME: add the expected AST
 		})
 	}
 	{
 		values = append(values, test{
 			name: "maps 1",
 			code: `
 			# make sure the "str:" part doesn't match a single ident
 			$strmap map{str: int} = {
 				"key1" => 42,
 				"key2" => -13,
 			}
 			`,
 			fail: false,
 			//exp: ???, // FIXME: add the expected AST
 		})
 	}
 	{
 		values = append(values, test{
 			name: "maps 2",
 			code: `
 			$mapstrintlist map{str: []int} = {
 				"key1" => [42, 44,],
 				"key2" => [],
 				"key3" => [-13,],
 			}
 			`,
 			fail: false,
 			//exp: ???, // FIXME: add the expected AST
 		})
 	}
 	{
 		values = append(values, test{
 			name: "maps and lists",
@@ -1268,6 +1296,52 @@ func TestLexParse0(t *testing.T) {
 			fail: true,
 		})
 	}
 	{
 		exp := &StmtProg{
 			Prog: []interfaces.Stmt{
 				&StmtClass{
 					Name: "x",
 					Args: []*Arg{},
 					Body: &StmtProg{
 						Prog: []interfaces.Stmt{},
 					},
 				},
 				&StmtClass{
 					Name: "y1",
 					Args: []*Arg{},
 					Body: &StmtProg{
 						Prog: []interfaces.Stmt{},
 					},
 				},
 				&StmtInclude{
 					Name: "z",
 					Args: nil,
 				},
 			},
 		}
 		values = append(values, test{
 			name: "simple class with args 0",
 			code: `
 			class x() {
 			}
 			class y1() {
 			}
 			include z
 			`,
 			fail: false,
 			exp:  exp,
 		})
 	}
 	{
 		values = append(values, test{
 			name: "simple class underscore failure",
 			code: `
 			class x_() {
 			}
 			`,
 			fail: true,
 		})
 	}
 	{
 		exp := &StmtProg{
 			Prog: []interfaces.Stmt{
--- a/lang/parser.y
+++ b/lang/parser.y
@@ -754,16 +754,17 @@ resource:
 			Contents: $4.resContents,
 		}
 	}
-// TODO: do we need to include this simpler case as well?
+	// note: this is a simplified version of the above if the lexer picks it
-//|	IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY
+	// note: must not include underscores, but that is checked after parsing
-//	{
+|	IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY
-//		posLast(yylex, yyDollar) // our pos
+	{
-//		$$.stmt = &StmtRes{
+		posLast(yylex, yyDollar) // our pos
-//			Kind:     $1.str,
+		$$.stmt = &StmtRes{
-//			Name:     $2.expr,
+			Kind:     $1.str,
-//			Contents: $4.resContents,
+			Name:     $2.expr,
-//		}
+			Contents: $4.resContents,
-//	}
+		}
 	}
 ;
 resource_body:
 	/* end of list */
@@ -885,6 +886,17 @@ edge_half:
 			//SendRecv: "", // unused
 		}
 	}
 	// note: this is a simplified version of the above if the lexer picks it
 	// note: must not include underscores, but that is checked after parsing
 |	CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK
 	{
 		posLast(yylex, yyDollar) // our pos
 		$$.edgeHalf = &StmtEdgeHalf{
 			Kind: $1.str,
 			Name: $3.expr,
 			//SendRecv: "", // unused
 		}
 	}
 ;
 edge_half_sendrecv:
 	// eg: Test["t1"].foo_send
@@ -897,6 +909,17 @@ edge_half_sendrecv:
 			SendRecv: $6.str,
 		}
 	}
 	// note: this is a simplified version of the above if the lexer picks it
 	// note: must not include underscores, but that is checked after parsing
 |	CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK DOT IDENTIFIER
 	{
 		posLast(yylex, yyDollar) // our pos
 		$$.edgeHalf = &StmtEdgeHalf{
 			Kind: $1.str,
 			Name: $3.expr,
 			SendRecv: $6.str,
 		}
 	}
 ;
 type:
 	BOOL_IDENTIFIER
--- a/lang/structs.go
+++ b/lang/structs.go
@@ -164,6 +164,10 @@ func (obj *StmtRes) Apply(fn func(interfaces.Node) error) error {
 // Init initializes this branch of the AST, and returns an error if it fails to
 // validate.
 func (obj *StmtRes) Init(data *interfaces.Data) error {
 	if strings.Contains(obj.Kind, "_") {
 		return fmt.Errorf("kind must not contain underscores")
 	}
 	obj.data = data
 	if err := obj.Name.Init(data); err != nil {
 		return err
@@ -1058,6 +1062,10 @@ func (obj *StmtEdgeHalf) Apply(fn func(interfaces.Node) error) error {
 // Init initializes this branch of the AST, and returns an error if it fails to
 // validate.
 func (obj *StmtEdgeHalf) Init(data *interfaces.Data) error {
 	if strings.Contains(obj.Kind, "_") {
 		return fmt.Errorf("kind must not contain underscores")
 	}
 	return obj.Name.Init(data)
 }