lang: Support lexing and parsing a list of files with offsets

This adds a LexParseWithOffsets method that also takes a list of offsets to be used if our input stream is composed of multiple io.Readers combined together. At the moment the offsets are based on line count instead of file size. I think the latter would be preferable, but it seems it's much more difficult to implement as it probably requires support in the lexer and parser. That improved solution would probably be faster, and more correct in case someone passed in a file without a trailing newline.
2018-07-03 21:02:45 -04:00
parent a287f028d1
commit c8e9a100a6
2 changed files with 201 additions and 0 deletions
--- a/lang/lexparse.go
+++ b/lang/lexparse.go
@@ -18,10 +18,18 @@
 package lang // TODO: move this into a sub package of lang/$name?
 import (
 	"bufio"
 	"fmt"
 	"io"
 	"path"
 	"sort"
 	"strings"
 	"github.com/purpleidea/mgmt/engine"
 	"github.com/purpleidea/mgmt/lang/interfaces"
 	"github.com/purpleidea/mgmt/util"
 	errwrap "github.com/pkg/errors"
 )
 // These constants represent the different possible lexer/parser errors.
@@ -42,6 +50,11 @@ type LexParseErr struct {
 	Str string
 	Row int // this is zero-indexed (the first line is 0)
 	Col int // this is zero-indexed (the first char is 0)
 	// Filename is the file that this error occurred in. If this is unknown,
 	// then it will be empty. This is not set when run by the basic LexParse
 	// function.
 	Filename string
 }
 // Error displays this error with all the relevant state information.
@@ -79,3 +92,132 @@ func LexParse(input io.Reader) (interfaces.Stmt, error) {
 	}
 	return lp.ast, nil
 }
 // LexParseWithOffsets takes an io.Reader input and a list of corresponding
 // offsets and runs LexParse on them. The input to this function is most
 // commonly the output from DirectoryReader which returns a single io.Reader and
 // the offsets map. It usually produces the combined io.Reader from an
 // io.MultiReader grouper. If the offsets map is nil or empty, then it simply
 // redirects directly to LexParse. This differs because when it errors it will
 // also report the corresponding file the error occurred in based on some offset
 // math. The offsets are in units of file size (bytes) and not length (lines).
 // FIXME: due to an implementation difficulty, offsets are currently in length!
 func LexParseWithOffsets(input io.Reader, offsets map[uint64]string) (interfaces.Stmt, error) {
 	if offsets == nil || len(offsets) == 0 {
 		return LexParse(input) // special case, no named offsets...
 	}
 	stmt, err := LexParse(input)
 	if err == nil { // handle the success case first because it ends faster
 		return stmt, nil
 	}
 	e, ok := err.(*LexParseErr)
 	if !ok {
 		return nil, err // unexpected error format
 	}
 	// rebuild the error so it contains the right filename index, etc...
 	uints := []uint64{}
 	for i := range offsets {
 		uints = append(uints, i)
 	}
 	sort.Sort(util.UInt64Slice(uints))
 	if i := uints[0]; i != 0 { // first offset is supposed to be zero
 		return nil, fmt.Errorf("unexpected first offset of %d", i)
 	}
 	// TODO: switch this to an offset in bytes instead of lines
 	// TODO: we'll also need a way to convert that into the new row number!
 	row := uint64(e.Row)
 	var i uint64           // initial condition
 	filename := offsets[0] // (assumption)
 	for _, i = range uints {
 		if row <= i {
 			break
 		}
 		// if we fall off the end of the loop, the last file is correct
 		filename = offsets[i]
 	}
 	return nil, &LexParseErr{
 		Err:      e.Err,        // same
 		Str:      e.Str,        // same
 		Row:      int(i - row), // computed offset
 		Col:      e.Col,        // same
 		Filename: filename,     // actual filename
 	}
 }
 // DirectoryReader takes a filesystem and an absolute directory path, and it
 // returns a combined reader into that directory, and an offset map of the file
 // contents. This is used to build a reader from a directory containing language
 // source files, and as a result, this will skip over files that don't have the
 // correct extension. The offsets are in units of file size (bytes) and not
 // length (lines).
 // FIXME: due to an implementation difficulty, offsets are currently in length!
 func DirectoryReader(fs engine.Fs, dir string) (io.Reader, map[uint64]string, error) {
 	fis, err := fs.ReadDir(dir) // ([]os.FileInfo, error)
 	if err != nil {
 		return nil, nil, errwrap.Wrapf(err, "can't stat directory contents `%s`", dir)
 	}
 	var offset uint64
 	offsets := make(map[uint64]string) // cumulative offset to abs. filename
 	readers := []io.Reader{}
 	for _, fi := range fis {
 		if fi.IsDir() {
 			continue // skip directories
 		}
 		name := path.Join(dir, fi.Name()) // relative path made absolute
 		if !strings.HasSuffix(name, "."+FileNameExtension) {
 			continue
 		}
 		f, err := fs.Open(name) // opens read-only
 		if err != nil {
 			return nil, nil, errwrap.Wrapf(err, "can't open file `%s`", name)
 		}
 		defer f.Close()
 		//stat, err := f.Stat() // (os.FileInfo, error)
 		//if err != nil {
 		//	return nil, nil, errwrap.Wrapf(err, "can't stat file `%s`", name)
 		//}
 		offsets[offset] = name // save cumulative offset (starts at 0)
 		//offset += uint64(stat.Size()) // the earlier stat causes file download
 		// TODO: store the offset in size instead of length! we're using
 		// length at the moment since it is not clear how easy it is for
 		// the lexer/parser to return the byte offset as well as line no
 		// NOTE: in addition, this scanning is not the fastest for perf!
 		scanner := bufio.NewScanner(f)
 		lines := 0
 		for scanner.Scan() { // each line
 			lines++
 		}
 		if err := scanner.Err(); err != nil {
 			return nil, nil, errwrap.Wrapf(err, "can't scan file `%s`", name)
 		}
 		offset += uint64(lines)
 		if start, err := f.Seek(0, io.SeekStart); err != nil { // reset
 			return nil, nil, errwrap.Wrapf(err, "can't reset file `%s`", name)
 		} else if start != 0 { // we should be at the start (0)
 			return nil, nil, fmt.Errorf("reset of file `%s` was %d", name, start)
 		}
 		readers = append(readers, f)
 	}
 	if len(offsets) == 0 {
 		// TODO: this condition should be validated during the deploy...
 		return nil, nil, fmt.Errorf("no files in main directory")
 	}
 	if len(offsets) == 1 { // no need for a multi reader
 		return readers[0], offsets, nil
 	}
 	return io.MultiReader(readers...), offsets, nil
 }
--- a/lang/lexparse_test.go
+++ b/lang/lexparse_test.go
@@ -20,6 +20,7 @@
 package lang
 import (
 	"io"
 	"reflect"
 	"strings"
 	"testing"
@@ -1519,3 +1520,61 @@ func TestLexParse2(t *testing.T) {
 		t.Logf("output: %+v", err)
 	}
 }
 func TestLexParseWithOffsets1(t *testing.T) {
 	code1 := `
 	# "file1"
 	$a = 42
 	$b = true
 	$c = 13
 	$d = "hello"
 	$e = true
 	$f = 3.13
 	`
 	code2 := `
 	# "file2"
 	# some noop resource
 	noop "n0" {
 		foo => true,
 		bar => false	# this should be a parser error (no comma)
 	}
 	# hello
 	# world
 	test "t2" {}
 	`
 	code3 := `
 	# "file3"
 	# this is some more code
 	test "t3" {}
 	`
 	str1 := strings.NewReader(code1)
 	str2 := strings.NewReader(code2)
 	str3 := strings.NewReader(code3)
 	// TODO: this is currently in number of lines instead of bytes
 	o1 := uint64(len(strings.Split(code1, "\n")) - 1)
 	o2 := uint64(len(strings.Split(code2, "\n")) - 1)
 	//o1 := uint64(len(code1))
 	//o2 := uint64(len(code2))
 	t.Logf("o1: %+v", o1)
 	t.Logf("o2: %+v", o2)
 	t.Logf("o1+o2: %+v", o1+o2)
 	readers := io.MultiReader(str1, str2, str3)
 	offsets := map[uint64]string{
 		0:       "file1",
 		o1:      "file2",
 		o1 + o2: "file3", // offset is cumulative
 	}
 	_, err := LexParseWithOffsets(readers, offsets)
 	if e, ok := err.(*LexParseErr); ok && e.Err != ErrParseExpectingComma {
 		t.Errorf("lex/parse failure, got: %+v", e)
 	} else if err == nil {
 		t.Errorf("lex/parse success, expected error")
 	} else {
 		if e.Row != 5 || e.Col != 9 || e.Filename != "file2" {
 			t.Errorf("expected error in 'file2' @ 5 x 9, got: '%s' @ %d x %d", e.Filename, e.Row, e.Col)
 		}
 		t.Logf("file @ row x col: '%s' @ %d x %d", e.Filename, e.Row, e.Col)
 		t.Logf("message: %s", e.Str)
 		t.Logf("output: %+v", err) // this will be 1-indexed, instead of zero-indexed
 	}
 }