lang: Add modern type unification implementation

This adds a modern type unification algorithm, which drastically improves performance, particularly for bigger programs. This required a change to the AST to add TypeCheck methods (for Stmt) and Infer/Check methods (for Expr). This also changed how the functions express their invariants, and as a result this was changed as well. This greatly improves the way we express these invariants, and as a result it makes adding new polymorphic functions significantly easier. This also makes error output for the user a lot better in pretty much all scenarios. The one downside of this patch is that a good chunk of it is merged in this giant single commit since it was hard to do it step-wise. That's not the end of the world. This couldn't be done without the guidance of Sam who helped me in explaining, debugging, and writing all the sneaky algorithmic parts and much more. Thanks again Sam! Co-authored-by: Samuel Gélineau <gelisam@gmail.com>
2024-07-01 18:33:47 -04:00
parent 4e18c9c67a
commit 14577a0c46
102 changed files with 3722 additions and 11132 deletions
--- a/lang/interfaces/ast.go
+++ b/lang/interfaces/ast.go
@@ -79,10 +79,11 @@ type Stmt interface {
 	// SetScope sets the scope here and propagates it downwards.
 	SetScope(*Scope) error

-	// Unify returns the list of invariants that this node produces. It does
-	// so recursively on any children elements that exist in the AST, and
-	// returns the collection to the caller.
-	Unify() ([]Invariant, error)
+	// TypeCheck returns the list of invariants that this node produces. It
+	// does so recursively on any children elements that exist in the AST,
+	// and returns the collection to the caller. It calls TypeCheck for
+	// child statements, and Infer/Check for child expressions.
+	TypeCheck() ([]*UnificationInvariant, error)

 	// Graph returns the reactive function graph expressed by this node.
 	Graph() (*pgraph.Graph, error)
@@ -128,10 +129,21 @@ type Expr interface {
 	// determine it statically. This errors if it is not yet known.
 	Type() (*types.Type, error)

-	// Unify returns the list of invariants that this node produces. It does
-	// so recursively on any children elements that exist in the AST, and
-	// returns the collection to the caller.
-	Unify() ([]Invariant, error)
+	// Infer returns the type of itself and a collection of invariants. The
+	// returned type may contain unification variables. It collects the
+	// invariants by calling Check on its children expressions. In making
+	// those calls, it passes in the known type for that child to get it to
+	// "Check" it. When the type is not known, it should create a new
+	// unification variable to pass in to the child Check calls. Infer
+	// usually only calls Check on things inside of it, and often does not
+	// call another Infer.
+	Infer() (*types.Type, []*UnificationInvariant, error)
+
+	// Check is checking that the input type is equal to the object that
+	// Check is running on. In doing so, it adds any invariants that are
+	// necessary. Check must always call Infer to produce the invariant. The
+	// implementation can be generic for all expressions.
+	Check(typ *types.Type) ([]*UnificationInvariant, error)

 	// Graph returns the reactive function graph expressed by this node. It
 	// takes in the environment of any functions in scope. It also returns
--- a/lang/interfaces/func.go
+++ b/lang/interfaces/func.go
@@ -41,7 +41,10 @@ import (
 )

 // FuncSig is the simple signature that is used throughout our implementations.
-type FuncSig = func([]types.Value) (types.Value, error)
+type FuncSig = func(context.Context, []types.Value) (types.Value, error)
+
+// Compile-time guarantee that *types.FuncValue accepts a func of type FuncSig.
+var _ = &types.FuncValue{V: FuncSig(nil)}

 // Info is a static representation of some information about the function. It is
 // used for static analysis and type checking. If you break this contract, you
@@ -90,14 +93,16 @@ type Init struct {
 type Func interface {
 	fmt.Stringer // so that this can be stored as a Vertex

-	Validate() error // FIXME: this is only needed for PolyFunc. Get it moved and used!
+	// Validate ensures that our struct implementing this function was built
+	// correctly.
+	Validate() error

 	// Info returns some information about the function in question, which
 	// includes the function signature. For a polymorphic function, this
 	// might not be known until after Build was called. As a result, the
-	// sig should be allowed to return a partial or variant type if it is
-	// not known yet. This is because the Info method might be called
-	// speculatively to aid in type unification.
+	// sig should be allowed to return a type that includes unification
+	// variables if it is not known yet. This is because the Info method
+	// might be called speculatively to aid in type unification elsewhere.
 	Info() *Info

 	// Init passes some important values and references to the function.
@@ -113,9 +118,11 @@ type Func interface {
 	Stream(context.Context) error
 }

-// PolyFunc is an interface for functions which are statically polymorphic. In
-// other words, they are functions which before compile time are polymorphic,
-// but after a successful compilation have a fixed static signature. This makes
+// BuildableFunc is an interface for functions which need a Build or Check step.
+// These functions need that method called after type unification to either tell
+// them the precise type, and/or Check if it's a valid solution. These functions
+// are usually polymorphic before compile time. After a successful compilation,
+// every function include these, must have a fixed static signature. This makes
 // implementing what would appear to be generic or polymorphic instead something
 // that is actually static and that still has the language safety properties.
 // Our engine requires that by the end of compilation, everything is static.
@@ -123,30 +130,13 @@ type Func interface {
 // their execution. If the types could change, then we wouldn't be able to
 // safely pass values around.
 //
-// NOTE: This interface is similar to OldPolyFunc, except that it uses a Unify
-// method that works differently than the original Polymorphisms method. This
-// allows us to build invariants that are used directly by the type unification
-// solver.
-type PolyFunc interface {
+// NOTE: This interface doesn't require any Infer/Check methods because simple
+// polymorphism can be achieved by having a type signature that contains
+// unification variables. Variants that require fancier extensions can implement
+// the InferableFunc interface as well.
+type BuildableFunc interface {
 	Func // implement everything in Func but add the additional requirements

-	// Unify returns the list of invariants that this func produces. It is a
-	// way for a polymorphic function to describe its type requirements. It
-	// would be expected for this function to return at least one
-	// ExclusiveInvariant or GeneratorInvariant, since these are two common
-	// mechanisms for polymorphic functions to describe their constraints.
-	// The important realization behind this method is that the collecting
-	// of possible invariants, must happen *before* the solver runs so that
-	// the solver can look at all the available logic *simultaneously* to
-	// find a solution if we want to be able to reliably solve for things.
-	// The input argument that it receives is the expression pointer that it
-	// is unifying against-- in other words, the pointer is its own handle.
-	// This is different than the `obj` reference of this function
-	// implementation because _that_ handle is not the object/pointer in the
-	// AST that we're discussing when performing type unification. Put
-	// another way: the Expr input is the ExprFunc, not the ExprCall.
-	Unify(Expr) ([]Invariant, error)
-
 	// Build takes the known or unified type signature for this function and
 	// finalizes this structure so that it is now determined, and ready to
 	// function as a normal function would. (The normal methods in the Func
@@ -159,52 +149,80 @@ type PolyFunc interface {
 	// will use. These are used when constructing the function graphs. This
 	// means that when this is called from SetType, it can set the correct
 	// type arg names, and this will also match what's in function Info().
+	// This can also be used as a "check" method to make sure that the
+	// unification result for this function is one of the valid
+	// possibilities. This can happen if the specified unification variables
+	// do not guarantee a valid type. (For example: the sig for the len()
+	// function is `func(?1) int`, but we can't build the function if ?1 is
+	// an int or a float. That is checked during Build.
 	Build(*types.Type) (*types.Type, error)
 }

-// OldPolyFunc is an interface for functions which are statically polymorphic.
-// In other words, they are functions which before compile time are polymorphic,
-// but after a successful compilation have a fixed static signature. This makes
-// implementing what would appear to be generic or polymorphic instead something
-// that is actually static and that still has the language safety properties.
-type OldPolyFunc interface {
+// InferableFunc is an interface which extends the BuildableFunc interface by
+// adding a new function that can give the user more control over how function
+// inference runs. This allows the user to return more precise information for
+// type unification from compile-time information, than would otherwise be
+// possible.
+//
+// NOTE: This is the third iteration of this interface which is now incredibly
+// well-polished.
+type InferableFunc interface { // TODO: Is there a better name for this?
+	BuildableFunc // includes Build and the base Func stuff...
+
+	// FuncInfer returns the type and the list of invariants that this func
+	// produces. That type may include unification variables. This is a
+	// fancy way for a polymorphic function to describe its type
+	// requirements. It uses compile-time information to help it build the
+	// correct signature and constraints. This compile time information is
+	// passed into this method as a list of partial "hints" that take the
+	// form of a (possible partial) function type signature (with as many
+	// types in it specified and the rest set to nil) and any known static
+	// values for the input args. If the partial type is not nil, then the
+	// Ord parameter must be of the correct arg length. If any types are
+	// specified, then the array of partial values must be of that length as
+	// well, with the known ones filled in. Some static polymorphic
+	// functions require a minimal amount of hinting or they will be unable
+	// to return any possible unambiguous result. Remember that your result
+	// can include unification variables, but it should not be a standalone
+	// ?1 variable. It should at the minimum be of the form `func(?1) ?2`.
+	// Since this is almost always called by an ExprCall when building
+	// invariants for type unification, we'll know the precise number of
+	// args the function is being called with, so you can use this
+	// information to more correctly discern the correct function you want
+	// to build. The arg names in your returned func type signatures can be
+	// in the standardized "a..b..c" format. Use util.NumToAlpha if you want
+	// to convert easily. These arg names will be replaced by the correct
+	// ones during the Build step. All of these features and limitations are
+	// this way so that we can use the standard Union-Fund type unification
+	// algorithm which runs fairly quickly.
+	// TODO: Do we ever need to return any invariants?
+	FuncInfer(partialType *types.Type, partialValues []types.Value) (*types.Type, []*UnificationInvariant, error)
+}
+
+// CopyableFunc is an interface which extends the base Func interface with the
+// ability to let our compiler know how to copy a Func if that func deems it's
+// needed to be able to do so.
+type CopyableFunc interface {
 	Func // implement everything in Func but add the additional requirements

-	// Polymorphisms returns a list of possible function type signatures. It
-	// takes as input a list of partial "hints" as to limit the number of
-	// possible results it returns. These partial hints take the form of a
-	// function type signature (with as many types in it specified and the
-	// rest set to nil) and any known static values for the input args. If
-	// the partial type is not nil, then the Ord parameter must be of the
-	// correct arg length. If any types are specified, then the array must
-	// be of that length as well, with the known ones filled in. Some
-	// static polymorphic functions require a minimal amount of hinting or
-	// they will be unable to return any possible result that is not
-	// infinite in length. If you expect to need to return an infinite (or
-	// very large) amount of results, then you should return an error
-	// instead. The arg names in your returned func type signatures should
-	// be in the standardized "a..b..c" format. Use util.NumToAlpha if you
-	// want to convert easily.
-	Polymorphisms(*types.Type, []types.Value) ([]*types.Type, error)
-
-	// Build takes the known or unified type signature for this function and
-	// finalizes this structure so that it is now determined, and ready to
-	// function as a normal function would. (The normal methods in the Func
-	// interface are all that should be needed or used after this point.)
-	// Of note, the names of the specific input args shouldn't matter as
-	// long as they are unique. Their position doesn't matter. This is so
-	// that unification can use "arg0", "arg1", "argN"... if they can't be
-	// determined statically. Build can transform them into it's desired
-	// form, and must return the type (with the correct arg names) that it
-	// will use. These are used when constructing the function graphs. This
-	// means that when this is called from SetType, it can set the correct
-	// type arg names, and this will also match what's in function Info().
-	Build(*types.Type) (*types.Type, error)
+	// Copy is used because we sometimes copy the ExprFunc with its Copy
+	// method because we're using the same ExprFunc in two places, and it
+	// might have a different type and type unification needs to solve for
+	// it in more than one way. It also turns out that some functions such
+	// as the struct lookup function store information that they learned
+	// during `FuncInfer`, and as a result, if we re-build this, then we
+	// lose that information and the function can then fail during `Build`.
+	// As a result, those functions can implement a `Copy` method which we
+	// will use instead, so they can preserve any internal state that they
+	// would like to keep.
+	Copy() Func
 }

 // NamedArgsFunc is a function that uses non-standard function arg names. If you
 // don't implement this, then the argnames (if specified) must correspond to the
 // a, b, c...z, aa, ab...az, ba...bz, and so on sequence.
+// XXX: I expect that we can get rid of this since type unification doesn't care
+// what the arguments are named, and at the end, we get them from Info or Build.
 type NamedArgsFunc interface {
 	Func // implement everything in Func but add the additional requirements

--- a/lang/interfaces/structs.go
+++ b/lang/interfaces/structs.go
@@ -1,206 +0,0 @@
-// Mgmt
-// Copyright (C) 2013-2024+ James Shubin and the project contributors
-// Written by James Shubin <james@shubin.ca> and the project contributors
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-//
-// Additional permission under GNU GPL version 3 section 7
-//
-// If you modify this program, or any covered work, by linking or combining it
-// with embedded mcl code and modules (and that the embedded mcl code and
-// modules which link with this program, contain a copy of their source code in
-// the authoritative form) containing parts covered by the terms of any other
-// license, the licensors of this program grant you additional permission to
-// convey the resulting work. Furthermore, the licensors of this program grant
-// the original author, James Shubin, additional permission to update this
-// additional permission if he deems it necessary to achieve the goals of this
-// additional permission.
-
-package interfaces
-
-import (
-	"fmt"
-
-	"github.com/purpleidea/mgmt/lang/types"
-	"github.com/purpleidea/mgmt/pgraph"
-)
-
-// ExprAny is a placeholder expression that is used for type unification hacks.
-type ExprAny struct {
-	typ *types.Type
-
-	V types.Value // stored value (set with SetValue)
-}
-
-// String returns a short representation of this expression.
-func (obj *ExprAny) String() string { return "any" }
-
-// Apply is a general purpose iterator method that operates on any AST node. It
-// is not used as the primary AST traversal function because it is less readable
-// and easy to reason about than manually implementing traversal for each node.
-// Nevertheless, it is a useful facility for operations that might only apply to
-// a select number of node types, since they won't need extra noop iterators...
-func (obj *ExprAny) Apply(fn func(Node) error) error { return fn(obj) }
-
-// Init initializes this branch of the AST, and returns an error if it fails to
-// validate.
-func (obj *ExprAny) Init(*Data) error { return nil }
-
-// Interpolate returns a new node (aka a copy) once it has been expanded. This
-// generally increases the size of the AST when it is used. It calls Interpolate
-// on any child elements and builds the new node with those new node contents.
-// Here it simply returns itself, as no interpolation is possible.
-func (obj *ExprAny) Interpolate() (Expr, error) {
-	return &ExprAny{
-		typ: obj.typ,
-		V:   obj.V,
-	}, nil
-}
-
-// Copy returns a light copy of this struct. Anything static will not be copied.
-func (obj *ExprAny) Copy() (Expr, error) {
-	return obj, nil // always static
-}
-
-// Ordering returns a graph of the scope ordering that represents the data flow.
-// This can be used in SetScope so that it knows the correct order to run it in.
-func (obj *ExprAny) Ordering(produces map[string]Node) (*pgraph.Graph, map[Node]string, error) {
-	graph, err := pgraph.NewGraph("ordering")
-	if err != nil {
-		return nil, nil, err
-	}
-	graph.AddVertex(obj)
-
-	cons := make(map[Node]string)
-	return graph, cons, nil
-}
-
-// SetScope does nothing for this struct, because it has no child nodes, and it
-// does not need to know about the parent scope.
-func (obj *ExprAny) SetScope(*Scope, map[string]Expr) error { return nil }
-
-// SetType is used to set the type of this expression once it is known. This
-// usually happens during type unification, but it can also happen during
-// parsing if a type is specified explicitly. Since types are static and don't
-// change on expressions, if you attempt to set a different type than what has
-// previously been set (when not initially known) this will error.
-func (obj *ExprAny) SetType(typ *types.Type) error {
-	if obj.typ != nil {
-		return obj.typ.Cmp(typ) // if not set, ensure it doesn't change
-	}
-	if obj.V != nil {
-		// if there's a value already, ensure the types are the same...
-		if err := obj.V.Type().Cmp(typ); err != nil {
-			return err
-		}
-	}
-	obj.typ = typ // set
-	return nil
-}
-
-// Type returns the type of this expression.
-func (obj *ExprAny) Type() (*types.Type, error) {
-	if obj.typ == nil && obj.V == nil {
-		return nil, ErrTypeCurrentlyUnknown
-	}
-	if obj.typ != nil && obj.V != nil {
-		if err := obj.V.Type().Cmp(obj.typ); err != nil {
-			return nil, err
-		}
-		return obj.typ, nil
-	}
-	if obj.V != nil {
-		return obj.V.Type(), nil
-	}
-	return obj.typ, nil
-}
-
-// Unify returns the list of invariants that this node produces. It recursively
-// calls Unify on any children elements that exist in the AST, and returns the
-// collection to the caller.
-func (obj *ExprAny) Unify() ([]Invariant, error) {
-	invariants := []Invariant{
-		&AnyInvariant{ // it has to be something, anything!
-			Expr: obj,
-		},
-	}
-	// TODO: should we return an EqualsInvariant with obj.typ ?
-	// TODO: should we return a ValueInvariant with obj.V ?
-	return invariants, nil
-}
-
-// Func returns the reactive stream of values that this expression produces.
-func (obj *ExprAny) Func() (Func, error) {
-	//	// XXX: this could be a list too, so improve this code or improve the subgraph code...
-	//	return &structs.ConstFunc{
-	//		Value: obj.V,
-	//	}
-
-	return nil, fmt.Errorf("programming error using ExprAny") // this should not be called
-}
-
-// Graph returns the reactive function graph which is expressed by this node. It
-// includes any vertices produced by this node, and the appropriate edges to any
-// vertices that are produced by its children. Nodes which fulfill the Expr
-// interface directly produce vertices (and possible children) where as nodes
-// that fulfill the Stmt interface do not produces vertices, where as their
-// children might. This returns a graph with a single vertex (itself) in it, and
-// the edges from all of the child graphs to this.
-func (obj *ExprAny) Graph(env map[string]Func) (*pgraph.Graph, Func, error) {
-	graph, err := pgraph.NewGraph("any")
-	if err != nil {
-		return nil, nil, err
-	}
-	function, err := obj.Func()
-	if err != nil {
-		return nil, nil, err
-	}
-	graph.AddVertex(function)
-
-	return graph, function, nil
-}
-
-// SetValue here is a no-op, because algorithmically when this is called from
-// the func engine, the child elements (the list elements) will have had this
-// done to them first, and as such when we try and retrieve the set value from
-// this expression by calling `Value`, it will build it from scratch!
-
-// SetValue here is used to store a value for this expression node. This value
-// is cached and can be retrieved by calling Value.
-func (obj *ExprAny) SetValue(value types.Value) error {
-	typ := value.Type()
-	if obj.typ != nil {
-		if err := obj.typ.Cmp(typ); err != nil {
-			return err
-		}
-	}
-	obj.typ = typ
-	obj.V = value
-	return nil
-}
-
-// Value returns the value of this expression in our type system. This will
-// usually only be valid once the engine has run and values have been produced.
-// This might get called speculatively (early) during unification to learn more.
-func (obj *ExprAny) Value() (types.Value, error) {
-	if obj.V == nil {
-		return nil, fmt.Errorf("value is not set")
-	}
-	return obj.V, nil
-}
-
-// ScopeGraph adds nodes and vertices to the supplied graph.
-func (obj *ExprAny) ScopeGraph(g *pgraph.Graph) {
-	g.AddVertex(obj)
-}
--- a/lang/interfaces/unification.go
+++ b/lang/interfaces/unification.go