diff --git a/lang/lexparse.go b/lang/lexparse.go index c3e5d71c..486e0edf 100644 --- a/lang/lexparse.go +++ b/lang/lexparse.go @@ -18,10 +18,18 @@ package lang // TODO: move this into a sub package of lang/$name? import ( + "bufio" "fmt" "io" + "path" + "sort" + "strings" + "github.com/purpleidea/mgmt/engine" "github.com/purpleidea/mgmt/lang/interfaces" + "github.com/purpleidea/mgmt/util" + + errwrap "github.com/pkg/errors" ) // These constants represent the different possible lexer/parser errors. @@ -42,6 +50,11 @@ type LexParseErr struct { Str string Row int // this is zero-indexed (the first line is 0) Col int // this is zero-indexed (the first char is 0) + + // Filename is the file that this error occurred in. If this is unknown, + // then it will be empty. This is not set when run by the basic LexParse + // function. + Filename string } // Error displays this error with all the relevant state information. @@ -79,3 +92,132 @@ func LexParse(input io.Reader) (interfaces.Stmt, error) { } return lp.ast, nil } + +// LexParseWithOffsets takes an io.Reader input and a list of corresponding +// offsets and runs LexParse on them. The input to this function is most +// commonly the output from DirectoryReader which returns a single io.Reader and +// the offsets map. It usually produces the combined io.Reader from an +// io.MultiReader grouper. If the offsets map is nil or empty, then it simply +// redirects directly to LexParse. This differs because when it errors it will +// also report the corresponding file the error occurred in based on some offset +// math. The offsets are in units of file size (bytes) and not length (lines). +// FIXME: due to an implementation difficulty, offsets are currently in length! +func LexParseWithOffsets(input io.Reader, offsets map[uint64]string) (interfaces.Stmt, error) { + if offsets == nil || len(offsets) == 0 { + return LexParse(input) // special case, no named offsets... + } + + stmt, err := LexParse(input) + if err == nil { // handle the success case first because it ends faster + return stmt, nil + } + e, ok := err.(*LexParseErr) + if !ok { + return nil, err // unexpected error format + } + + // rebuild the error so it contains the right filename index, etc... + + uints := []uint64{} + for i := range offsets { + uints = append(uints, i) + } + sort.Sort(util.UInt64Slice(uints)) + if i := uints[0]; i != 0 { // first offset is supposed to be zero + return nil, fmt.Errorf("unexpected first offset of %d", i) + } + + // TODO: switch this to an offset in bytes instead of lines + // TODO: we'll also need a way to convert that into the new row number! + row := uint64(e.Row) + var i uint64 // initial condition + filename := offsets[0] // (assumption) + for _, i = range uints { + if row <= i { + break + } + + // if we fall off the end of the loop, the last file is correct + filename = offsets[i] + } + + return nil, &LexParseErr{ + Err: e.Err, // same + Str: e.Str, // same + Row: int(i - row), // computed offset + Col: e.Col, // same + Filename: filename, // actual filename + } +} + +// DirectoryReader takes a filesystem and an absolute directory path, and it +// returns a combined reader into that directory, and an offset map of the file +// contents. This is used to build a reader from a directory containing language +// source files, and as a result, this will skip over files that don't have the +// correct extension. The offsets are in units of file size (bytes) and not +// length (lines). +// FIXME: due to an implementation difficulty, offsets are currently in length! +func DirectoryReader(fs engine.Fs, dir string) (io.Reader, map[uint64]string, error) { + fis, err := fs.ReadDir(dir) // ([]os.FileInfo, error) + if err != nil { + return nil, nil, errwrap.Wrapf(err, "can't stat directory contents `%s`", dir) + } + + var offset uint64 + offsets := make(map[uint64]string) // cumulative offset to abs. filename + readers := []io.Reader{} + + for _, fi := range fis { + if fi.IsDir() { + continue // skip directories + } + name := path.Join(dir, fi.Name()) // relative path made absolute + if !strings.HasSuffix(name, "."+FileNameExtension) { + continue + } + + f, err := fs.Open(name) // opens read-only + if err != nil { + return nil, nil, errwrap.Wrapf(err, "can't open file `%s`", name) + } + defer f.Close() + //stat, err := f.Stat() // (os.FileInfo, error) + //if err != nil { + // return nil, nil, errwrap.Wrapf(err, "can't stat file `%s`", name) + //} + + offsets[offset] = name // save cumulative offset (starts at 0) + //offset += uint64(stat.Size()) // the earlier stat causes file download + + // TODO: store the offset in size instead of length! we're using + // length at the moment since it is not clear how easy it is for + // the lexer/parser to return the byte offset as well as line no + // NOTE: in addition, this scanning is not the fastest for perf! + scanner := bufio.NewScanner(f) + lines := 0 + for scanner.Scan() { // each line + lines++ + } + if err := scanner.Err(); err != nil { + return nil, nil, errwrap.Wrapf(err, "can't scan file `%s`", name) + } + offset += uint64(lines) + if start, err := f.Seek(0, io.SeekStart); err != nil { // reset + return nil, nil, errwrap.Wrapf(err, "can't reset file `%s`", name) + } else if start != 0 { // we should be at the start (0) + return nil, nil, fmt.Errorf("reset of file `%s` was %d", name, start) + } + + readers = append(readers, f) + } + if len(offsets) == 0 { + // TODO: this condition should be validated during the deploy... + return nil, nil, fmt.Errorf("no files in main directory") + } + + if len(offsets) == 1 { // no need for a multi reader + return readers[0], offsets, nil + } + + return io.MultiReader(readers...), offsets, nil +} diff --git a/lang/lexparse_test.go b/lang/lexparse_test.go index 7ff9fafe..a8ca3ff9 100644 --- a/lang/lexparse_test.go +++ b/lang/lexparse_test.go @@ -20,6 +20,7 @@ package lang import ( + "io" "reflect" "strings" "testing" @@ -1519,3 +1520,61 @@ func TestLexParse2(t *testing.T) { t.Logf("output: %+v", err) } } + +func TestLexParseWithOffsets1(t *testing.T) { + code1 := ` + # "file1" + $a = 42 + $b = true + $c = 13 + $d = "hello" + $e = true + $f = 3.13 + ` + code2 := ` + # "file2" + # some noop resource + noop "n0" { + foo => true, + bar => false # this should be a parser error (no comma) + } + # hello + # world + test "t2" {} + ` + code3 := ` + # "file3" + # this is some more code + test "t3" {} + ` + str1 := strings.NewReader(code1) + str2 := strings.NewReader(code2) + str3 := strings.NewReader(code3) + // TODO: this is currently in number of lines instead of bytes + o1 := uint64(len(strings.Split(code1, "\n")) - 1) + o2 := uint64(len(strings.Split(code2, "\n")) - 1) + //o1 := uint64(len(code1)) + //o2 := uint64(len(code2)) + t.Logf("o1: %+v", o1) + t.Logf("o2: %+v", o2) + t.Logf("o1+o2: %+v", o1+o2) + readers := io.MultiReader(str1, str2, str3) + offsets := map[uint64]string{ + 0: "file1", + o1: "file2", + o1 + o2: "file3", // offset is cumulative + } + _, err := LexParseWithOffsets(readers, offsets) + if e, ok := err.(*LexParseErr); ok && e.Err != ErrParseExpectingComma { + t.Errorf("lex/parse failure, got: %+v", e) + } else if err == nil { + t.Errorf("lex/parse success, expected error") + } else { + if e.Row != 5 || e.Col != 9 || e.Filename != "file2" { + t.Errorf("expected error in 'file2' @ 5 x 9, got: '%s' @ %d x %d", e.Filename, e.Row, e.Col) + } + t.Logf("file @ row x col: '%s' @ %d x %d", e.Filename, e.Row, e.Col) + t.Logf("message: %s", e.Str) + t.Logf("output: %+v", err) // this will be 1-indexed, instead of zero-indexed + } +}