lang: Allow matching underscores in some of the identifier's

This allows matching underscores in some of the identifier's, but not
when they're the last character.

This caused me to suffer a bit of pain tracking down a bug which turned
out to be in the lexer. It started with a failing test that I wrote in:

974c2498c4

and which followed with a fix in:

52682f463a

Glad that's fixed!
This commit is contained in:
James Shubin
2018-09-22 10:37:29 -04:00
parent a26620da38
commit 57ce3fa587
4 changed files with 135 additions and 26 deletions

View File

@@ -289,7 +289,7 @@
panic(fmt.Sprintf("error lexing FLOAT, got: %v", err)) panic(fmt.Sprintf("error lexing FLOAT, got: %v", err))
} }
} }
/\$[a-z][a-z0-9]*{[0-9]+}/ /\$[a-z]+([a-z0-9_]*[a-z0-9]+)?{[0-9]+}/
{ {
// we have this as a single token, because otherwise the // we have this as a single token, because otherwise the
// parser can get confused by the curly brackets :/ // parser can get confused by the curly brackets :/
@@ -323,39 +323,39 @@
panic(fmt.Sprintf("error lexing VAR_IDENTIFIER_HX, got: %v", err)) panic(fmt.Sprintf("error lexing VAR_IDENTIFIER_HX, got: %v", err))
} }
} }
/\$[a-z][a-z0-9]*/ /\$[a-z]([a-z0-9_]*[a-z0-9]+)?/
{ {
// an alternate pattern: /\$[a-z](|[a-z0-9_]*[a-z0-9])/
yylex.pos(lval) // our pos yylex.pos(lval) // our pos
s := yylex.Text() s := yylex.Text()
lval.str = s[1:len(s)] // remove the leading $ lval.str = s[1:len(s)] // remove the leading $
return VAR_IDENTIFIER return VAR_IDENTIFIER
} }
/[A-Z][a-z0-9:]*[a-z0-9]*/ /[a-z]([a-z0-9_]*[a-z0-9]+)?/
{ {
yylex.pos(lval) // our pos yylex.pos(lval) // our pos
s := yylex.Text() lval.str = yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it return IDENTIFIER
return CAPITALIZED_RES_IDENTIFIER
} }
/[a-z][a-z0-9:]*[a-z0-9]*/ /[A-Z]([a-z0-9_]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return RES_IDENTIFIER
}
/[A-Z][a-z0-9]*/
{ {
yylex.pos(lval) // our pos yylex.pos(lval) // our pos
s := yylex.Text() s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it lval.str = strings.ToLower(s) // uncapitalize it
return CAPITALIZED_IDENTIFIER return CAPITALIZED_IDENTIFIER
} }
/[a-z][a-z0-9]*/ /[a-z]([a-z0-9:]*[a-z0-9]+)?/
{ {
yylex.pos(lval) // our pos yylex.pos(lval) // our pos
lval.str = yylex.Text() lval.str = yylex.Text()
return IDENTIFIER return RES_IDENTIFIER
}
/[A-Z]([a-z0-9:]*[a-z0-9]+)?/
{
yylex.pos(lval) // our pos
s := yylex.Text()
lval.str = strings.ToLower(s) // uncapitalize it
return CAPITALIZED_RES_IDENTIFIER
} }
/#[^\n]*/ /#[^\n]*/
{ // this matches a (#) pound char followed by any { // this matches a (#) pound char followed by any
@@ -405,3 +405,7 @@ import (
"fmt" "fmt"
"strconv" "strconv"
) )
// NOTE:
// Among rules in the same scope, the longest matching pattern takes precedence.
// In event of a tie, the first pattern wins.

View File

@@ -241,6 +241,34 @@ func TestLexParse0(t *testing.T) {
//exp: ???, // FIXME: add the expected AST //exp: ???, // FIXME: add the expected AST
}) })
} }
{
values = append(values, test{
name: "maps 1",
code: `
# make sure the "str:" part doesn't match a single ident
$strmap map{str: int} = {
"key1" => 42,
"key2" => -13,
}
`,
fail: false,
//exp: ???, // FIXME: add the expected AST
})
}
{
values = append(values, test{
name: "maps 2",
code: `
$mapstrintlist map{str: []int} = {
"key1" => [42, 44,],
"key2" => [],
"key3" => [-13,],
}
`,
fail: false,
//exp: ???, // FIXME: add the expected AST
})
}
{ {
values = append(values, test{ values = append(values, test{
name: "maps and lists", name: "maps and lists",
@@ -1268,6 +1296,52 @@ func TestLexParse0(t *testing.T) {
fail: true, fail: true,
}) })
} }
{
exp := &StmtProg{
Prog: []interfaces.Stmt{
&StmtClass{
Name: "x",
Args: []*Arg{},
Body: &StmtProg{
Prog: []interfaces.Stmt{},
},
},
&StmtClass{
Name: "y1",
Args: []*Arg{},
Body: &StmtProg{
Prog: []interfaces.Stmt{},
},
},
&StmtInclude{
Name: "z",
Args: nil,
},
},
}
values = append(values, test{
name: "simple class with args 0",
code: `
class x() {
}
class y1() {
}
include z
`,
fail: false,
exp: exp,
})
}
{
values = append(values, test{
name: "simple class underscore failure",
code: `
class x_() {
}
`,
fail: true,
})
}
{ {
exp := &StmtProg{ exp := &StmtProg{
Prog: []interfaces.Stmt{ Prog: []interfaces.Stmt{

View File

@@ -754,16 +754,17 @@ resource:
Contents: $4.resContents, Contents: $4.resContents,
} }
} }
// TODO: do we need to include this simpler case as well? // note: this is a simplified version of the above if the lexer picks it
//| IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY // note: must not include underscores, but that is checked after parsing
// { | IDENTIFIER expr OPEN_CURLY resource_body CLOSE_CURLY
// posLast(yylex, yyDollar) // our pos {
// $$.stmt = &StmtRes{ posLast(yylex, yyDollar) // our pos
// Kind: $1.str, $$.stmt = &StmtRes{
// Name: $2.expr, Kind: $1.str,
// Contents: $4.resContents, Name: $2.expr,
// } Contents: $4.resContents,
// } }
}
; ;
resource_body: resource_body:
/* end of list */ /* end of list */
@@ -885,6 +886,17 @@ edge_half:
//SendRecv: "", // unused //SendRecv: "", // unused
} }
} }
// note: this is a simplified version of the above if the lexer picks it
// note: must not include underscores, but that is checked after parsing
| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK
{
posLast(yylex, yyDollar) // our pos
$$.edgeHalf = &StmtEdgeHalf{
Kind: $1.str,
Name: $3.expr,
//SendRecv: "", // unused
}
}
; ;
edge_half_sendrecv: edge_half_sendrecv:
// eg: Test["t1"].foo_send // eg: Test["t1"].foo_send
@@ -897,6 +909,17 @@ edge_half_sendrecv:
SendRecv: $6.str, SendRecv: $6.str,
} }
} }
// note: this is a simplified version of the above if the lexer picks it
// note: must not include underscores, but that is checked after parsing
| CAPITALIZED_IDENTIFIER OPEN_BRACK expr CLOSE_BRACK DOT IDENTIFIER
{
posLast(yylex, yyDollar) // our pos
$$.edgeHalf = &StmtEdgeHalf{
Kind: $1.str,
Name: $3.expr,
SendRecv: $6.str,
}
}
; ;
type: type:
BOOL_IDENTIFIER BOOL_IDENTIFIER

View File

@@ -164,6 +164,10 @@ func (obj *StmtRes) Apply(fn func(interfaces.Node) error) error {
// Init initializes this branch of the AST, and returns an error if it fails to // Init initializes this branch of the AST, and returns an error if it fails to
// validate. // validate.
func (obj *StmtRes) Init(data *interfaces.Data) error { func (obj *StmtRes) Init(data *interfaces.Data) error {
if strings.Contains(obj.Kind, "_") {
return fmt.Errorf("kind must not contain underscores")
}
obj.data = data obj.data = data
if err := obj.Name.Init(data); err != nil { if err := obj.Name.Init(data); err != nil {
return err return err
@@ -1058,6 +1062,10 @@ func (obj *StmtEdgeHalf) Apply(fn func(interfaces.Node) error) error {
// Init initializes this branch of the AST, and returns an error if it fails to // Init initializes this branch of the AST, and returns an error if it fails to
// validate. // validate.
func (obj *StmtEdgeHalf) Init(data *interfaces.Data) error { func (obj *StmtEdgeHalf) Init(data *interfaces.Data) error {
if strings.Contains(obj.Kind, "_") {
return fmt.Errorf("kind must not contain underscores")
}
return obj.Name.Init(data) return obj.Name.Init(data)
} }