lang: Allow matching underscores in some of the identifier's

This allows matching underscores in some of the identifier's, but not
when they're the last character.

This caused me to suffer a bit of pain tracking down a bug which turned
out to be in the lexer. It started with a failing test that I wrote in:

974c2498c4

and which followed with a fix in:

52682f463a

Glad that's fixed!
This commit is contained in:
James Shubin
2018-09-22 10:37:29 -04:00
parent a26620da38
commit 57ce3fa587
4 changed files with 135 additions and 26 deletions

View File

@@ -289,7 +289,7 @@
panic(fmt.Sprintf("error lexing FLOAT, got: %v", err))
}
}
/\$[a-z][a-z0-9]*{[0-9]+}/
/\$[a-z]+([a-z0-9_]*[a-z0-9]+)?{[0-9]+}/
{
// we have this as a single token, because otherwise the
// parser can get confused by the curly brackets :/
@@ -323,39 +323,39 @@
panic(fmt.Sprintf("error lexing VAR_IDENTIFIER_HX, got: %v", err))
}
}
/\$[a-z][a-z0-9]*/
/\$[a-z]([a-z0-9_]*[a-z0-9]+)?/
{
// an alternate pattern: /\$[a-z](|[a-z0-9_]*[a-z0-9])/
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = s[1:len(s)] // remove the leading $
return VAR_IDENTIFIER
}
/[A-Z][a-z0-9:]*[a-z0-9]*/
/[a-z]([a-z0-9_]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return CAPITALIZED_RES_IDENTIFIER
lval.str = yylex.Text()
return IDENTIFIER
}
/[a-z][a-z0-9:]*[a-z0-9]*/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return RES_IDENTIFIER
}
/[A-Z][a-z0-9]*/
/[A-Z]([a-z0-9_]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return CAPITALIZED_IDENTIFIER
}
/[a-z][a-z0-9]*/
/[a-z]([a-z0-9:]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
lval.str = yylex.Text()
return IDENTIFIER
return RES_IDENTIFIER
}
/[A-Z]([a-z0-9:]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return CAPITALIZED_RES_IDENTIFIER
}
/#[^\n]*/
{ // this matches a (#) pound char followed by any
@@ -405,3 +405,7 @@ import (
"fmt"
"strconv"
)
// NOTE:
// Among rules in the same scope, the longest matching pattern takes precedence.
// In event of a tie, the first pattern wins.

View File

@@ -241,6 +241,34 @@ func TestLexParse0(t *testing.T) {
//exp: ???, // FIXME: add the expected AST
})
}
{
values = append(values, test{
name: "maps 1",
code: `
# make sure the "str:" part doesn't match a single ident
$strmap map{str: int} = {
"key1" => 42,
"key2" => -13,
}
`,
fail: false,
//exp: ???, // FIXME: add the expected AST
})
}
{
values = append(values, test{
name: "maps 2",
code: `
$mapstrintlist map{str: []int} = {
"key1" => [42, 44,],
"key2" => [],
"key3" => [-13,],
}
`,
fail: false,
//exp: ???, // FIXME: add the expected AST
})
}
{
values = append(values, test{
name: "maps and lists",
@@ -1268,6 +1296,52 @@ func TestLexParse0(t *testing.T) {
fail: true,
})
}
{
exp := &StmtProg{
Prog: []interfaces.Stmt{
&StmtClass{
Name: "x",
Args: []*Arg{},
Body: &StmtProg{
Prog: []interfaces.Stmt{},
},
},
&StmtClass{
Name: "y1",
Args: []*Arg{},
Body: &StmtProg{
Prog: []interfaces.Stmt{},
},
},
&StmtInclude{
Name: "z",
Args: nil,
},
},
}
values = append(values, test{
name: "simple class with args 0",
code: `
class x() {
}
class y1() {
}
include z
`,
fail: false,
exp: exp,
})
}
{
values = append(values, test{
name: "simple class underscore failure",
code: `
class x_() {
}
`,
fail: true,
})
}
{
exp := &StmtProg{
Prog: []interfaces.Stmt{

View File

@@ -754,16 +754,17 @@ resource:
Contents: $4.resContents,
}
}
// TODO: do we need to include this simpler case as well?
//| IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY
// {
// posLast(yylex, yyDollar) // our pos
// $$.stmt = &StmtRes{
// Kind: $1.str,
// Name: $2.expr,
// Contents: $4.resContents,
// }
// }
// note: this is a simplified version of the above if the lexer picks it
// note: must not include underscores, but that is checked after parsing
| IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY
{
posLast(yylex, yyDollar) // our pos
$$.stmt = &StmtRes{
Kind: $1.str,
Name: $2.expr,
Contents: $4.resContents,
}
}
;
resource_body:
/* end of list */
@@ -885,6 +886,17 @@ edge_half:
//SendRecv: "", // unused
}
}
// note: this is a simplified version of the above if the lexer picks it
// note: must not include underscores, but that is checked after parsing
| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK
{
posLast(yylex, yyDollar) // our pos
$$.edgeHalf = &StmtEdgeHalf{
Kind: $1.str,
Name: $3.expr,
//SendRecv: "", // unused
}
}
;
edge_half_sendrecv:
// eg: Test["t1"].foo_send
@@ -897,6 +909,17 @@ edge_half_sendrecv:
SendRecv: $6.str,
}
}
// note: this is a simplified version of the above if the lexer picks it
// note: must not include underscores, but that is checked after parsing
| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK DOT IDENTIFIER
{
posLast(yylex, yyDollar) // our pos
$$.edgeHalf = &StmtEdgeHalf{
Kind: $1.str,
Name: $3.expr,
SendRecv: $6.str,
}
}
;
type:
BOOL_IDENTIFIER

View File

@@ -164,6 +164,10 @@ func (obj *StmtRes) Apply(fn func(interfaces.Node) error) error {
// Init initializes this branch of the AST, and returns an error if it fails to
// validate.
func (obj *StmtRes) Init(data *interfaces.Data) error {
if strings.Contains(obj.Kind, "_") {
return fmt.Errorf("kind must not contain underscores")
}
obj.data = data
if err := obj.Name.Init(data); err != nil {
return err
@@ -1058,6 +1062,10 @@ func (obj *StmtEdgeHalf) Apply(fn func(interfaces.Node) error) error {
// Init initializes this branch of the AST, and returns an error if it fails to
// validate.
func (obj *StmtEdgeHalf) Init(data *interfaces.Data) error {
if strings.Contains(obj.Kind, "_") {
return fmt.Errorf("kind must not contain underscores")
}
return obj.Name.Init(data)
}