From 2bc23c468e4208345e953347245250d800699f06 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Mon, 25 Sep 2023 18:22:22 -0400 Subject: [PATCH] lang: funcs: core: iter: Finish map function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was the goal all along. Proper iteration without for loops. Yay! Co-authored-by: Samuel Gélineau --- examples/lang/map-iterator0.mcl | 13 ++ examples/lang/map-iterator1.mcl | 28 ++++ examples/lang/map-iterator2.mcl | 49 ++++++ lang/funcs/core/iter/map_func.go | 278 +++++++++++++++++++++++++------ 4 files changed, 319 insertions(+), 49 deletions(-) create mode 100644 examples/lang/map-iterator0.mcl create mode 100644 examples/lang/map-iterator1.mcl create mode 100644 examples/lang/map-iterator2.mcl diff --git a/examples/lang/map-iterator0.mcl b/examples/lang/map-iterator0.mcl new file mode 100644 index 00000000..316555ca --- /dev/null +++ b/examples/lang/map-iterator0.mcl @@ -0,0 +1,13 @@ +import "iter" + +$fn = func($x) { # notable because concrete type is fn(t1) t2, where t1 != t2 + len($x) +} + +$in1 = ["a", "bb", "ccc", "dddd", "eeeee",] + +$out1 = iter.map($in1, $fn) + +$t1 = template("out1: {{ . }}", $out1) + +test $t1 {} diff --git a/examples/lang/map-iterator1.mcl b/examples/lang/map-iterator1.mcl new file mode 100644 index 00000000..86165229 --- /dev/null +++ b/examples/lang/map-iterator1.mcl @@ -0,0 +1,28 @@ +import "datetime" +import "iter" +import "math" + +$now = datetime.now() + +# alternate every four seconds +$mod0 = math.mod($now, 8) == 0 +$mod1 = math.mod($now, 8) == 1 +$mod2 = math.mod($now, 8) == 2 +$mod3 = math.mod($now, 8) == 3 +$mod = $mod0 || $mod1 || $mod2 || $mod3 + +$fn = func($x) { # notable because concrete type is fn(t1) t2, where t1 != t2 + len($x) +} + +$in1 = if $mod { + ["a", "bb", "ccc", "dddd", "eeeee",] +} else { + ["ffffff", "ggggggg", "hhhhhhhh", "iiiiiiiii", "jjjjjjjjjj",] +} + +$out1 = iter.map($in1, $fn) + +$t1 = template("out1: {{ . }}", $out1) + +test $t1 {} diff --git a/examples/lang/map-iterator2.mcl b/examples/lang/map-iterator2.mcl new file mode 100644 index 00000000..917d24f5 --- /dev/null +++ b/examples/lang/map-iterator2.mcl @@ -0,0 +1,49 @@ +import "datetime" +import "iter" +import "math" + +$now = datetime.now() + +# alternate every four seconds +$mod0 = math.mod($now, 8) == 0 +$mod1 = math.mod($now, 8) == 1 +$mod2 = math.mod($now, 8) == 2 +$mod3 = math.mod($now, 8) == 3 +$moda = $mod0 || $mod1 || $mod2 || $mod3 + +$mod4 = math.mod($now, 8) == 4 +$mod5 = math.mod($now, 8) == 5 +$mod6 = math.mod($now, 8) == 6 +$mod7 = math.mod($now, 8) == 7 +$modb = $mod4 || $mod5 || $mod6 || $mod7 + +$fn = if $moda { + func($x) { # notable because concrete type is fn(t1) t2, where t1 != t2 + len($x) + } +} else { + func($x) { # notable because concrete type is fn(t1) t2, where t1 != t2 + -1*len($x) + } +} + +$in1 = if $modb { + ["a", "bb", "ccc", "dddd", "eeeee",] +} else { + ["ffffff", "ggggggg", "hhhhhhhh", "iiiiiiiii", "jjjjjjjjjj",] +} + +$out1 = iter.map($in1, $fn) + +$t1 = template("out1: {{ . }}", $out1) + +test $t1 {} + +file "/tmp/mgmt/map" { + state => $const.res.file.state.exists, + content => $t1, +} + +file "/tmp/mgmt/" { + state => $const.res.file.state.exists, +} diff --git a/lang/funcs/core/iter/map_func.go b/lang/funcs/core/iter/map_func.go index 28f16f48..c2bd8043 100644 --- a/lang/funcs/core/iter/map_func.go +++ b/lang/funcs/core/iter/map_func.go @@ -22,8 +22,10 @@ import ( "fmt" "github.com/purpleidea/mgmt/lang/funcs" + "github.com/purpleidea/mgmt/lang/funcs/structs" "github.com/purpleidea/mgmt/lang/interfaces" "github.com/purpleidea/mgmt/lang/types" + "github.com/purpleidea/mgmt/lang/types/full" "github.com/purpleidea/mgmt/util" "github.com/purpleidea/mgmt/util/errwrap" ) @@ -31,16 +33,17 @@ import ( const ( // MapFuncName is the name this function is registered as. MapFuncName = "map" + + // arg names... + mapArgNameInputs = "inputs" + mapArgNameFunction = "function" ) func init() { funcs.ModuleRegister(ModuleName, MapFuncName, func() interfaces.Func { return &MapFunc{} }) // must register the func and name } -const ( - argNameInputs = "inputs" - argNameFunction = "function" -) +var _ interfaces.PolyFunc = &MapFunc{} // ensure it meets this expectation // MapFunc is the standard map iterator function that applies a function to each // element in a list. It returns a list with the same number of elements as the @@ -59,10 +62,16 @@ type MapFunc struct { init *interfaces.Init last types.Value // last value received to use for diff - inputs types.Value - function func([]types.Value) (types.Value, error) + lastFuncValue *full.FuncValue // remember the last function value + lastInputListLength int // remember the last input list length - result types.Value // last calculated output + inputListType *types.Type + outputListType *types.Type + + // outputChan is an initially-nil channel from which we receive output + // lists from the subgraph. This channel is reset when the subgraph is + // recreated. + outputChan chan types.Value } // String returns a simple name for this function. This is needed so this struct @@ -73,7 +82,7 @@ func (obj *MapFunc) String() string { // ArgGen returns the Nth arg name for this function. func (obj *MapFunc) ArgGen(index int) (string, error) { - seq := []string{argNameInputs, argNameFunction} // inverted for pretty! + seq := []string{mapArgNameInputs, mapArgNameFunction} // inverted for pretty! if l := len(seq); index >= l { return "", fmt.Errorf("index %d exceeds arg length of %d", index, l) } @@ -439,7 +448,7 @@ func (obj *MapFunc) Polymorphisms(partialType *types.Type, partialValues []types tI := types.NewType(fmt.Sprintf("[]%s", t1.String())) // in tO := types.NewType(fmt.Sprintf("[]%s", t2.String())) // out tF := types.NewType(fmt.Sprintf("func(%s) %s", t1.String(), t2.String())) - s := fmt.Sprintf("func(%s %s, %s %s) %s", argNameInputs, tI, argNameFunction, tF, tO) + s := fmt.Sprintf("func(%s %s, %s %s) %s", mapArgNameInputs, tI, mapArgNameFunction, tF, tO) typ := types.NewType(s) // yay! // TODO: type check that the partialValues are compatible @@ -552,80 +561,251 @@ func (obj *MapFunc) sig() *types.Type { tO := types.NewType(fmt.Sprintf("[]%s", tOi.String())) // return type // type of 1st arg (the function) - tF := types.NewType(fmt.Sprintf("func(%s) %s", tIi.String(), tOi.String())) + tF := types.NewType(fmt.Sprintf("func(%s %s) %s", "name-which-can-vary-over-time", tIi.String(), tOi.String())) - s := fmt.Sprintf("func(%s %s, %s %s) %s", argNameInputs, tI, argNameFunction, tF, tO) + s := fmt.Sprintf("func(%s %s, %s %s) %s", mapArgNameInputs, tI, mapArgNameFunction, tF, tO) return types.NewType(s) // yay! } // Init runs some startup code for this function. func (obj *MapFunc) Init(init *interfaces.Init) error { obj.init = init + obj.lastFuncValue = nil + obj.lastInputListLength = -1 + + obj.inputListType = types.NewType(fmt.Sprintf("[]%s", obj.Type)) + obj.outputListType = types.NewType(fmt.Sprintf("[]%s", obj.RType)) + return nil } // Stream returns the changing values that this func has over time. func (obj *MapFunc) Stream(ctx context.Context) error { + // Every time the FuncValue or the length of the list changes, recreate the + // subgraph, by calling the FuncValue N times on N nodes, each of which + // extracts one of the N values in the list. + defer close(obj.init.Output) // the sender closes - rtyp := types.NewType(fmt.Sprintf("[]%s", obj.RType.String())) + + // A Func to send input lists to the subgraph. The Txn.Erase() call ensures + // that this Func is not removed when the subgraph is recreated, so that the + // function graph can propagate the last list we received to the subgraph. + inputChan := make(chan types.Value) + subgraphInput := &structs.ChannelBasedSourceFunc{ + Name: "subgraphInput", + Source: obj, + Chan: inputChan, + Type: obj.inputListType, + } + obj.init.Txn.AddVertex(subgraphInput) + if err := obj.init.Txn.Commit(); err != nil { + return errwrap.Wrapf(err, "commit error in Stream") + } + obj.init.Txn.Erase() // prevent the next Reverse() from removing subgraphInput + defer func() { + close(inputChan) + obj.init.Txn.Reverse() + obj.init.Txn.DeleteVertex(subgraphInput) + obj.init.Txn.Commit() + }() + + obj.outputChan = nil + + canReceiveMoreFuncValuesOrInputLists := true + canReceiveMoreOutputLists := true for { + + if !canReceiveMoreFuncValuesOrInputLists && !canReceiveMoreOutputLists { + //break + return nil + } + select { case input, ok := <-obj.init.Input: if !ok { - obj.init.Input = nil // don't infinite loop back - continue // no more inputs, but don't return! + obj.init.Input = nil // block looping back here + canReceiveMoreFuncValuesOrInputLists = false + continue } - //if err := input.Type().Cmp(obj.Info().Sig.Input); err != nil { - // return errwrap.Wrapf(err, "wrong function input") - //} if obj.last != nil && input.Cmp(obj.last) == nil { continue // value didn't change, skip it } obj.last = input // store for next - function := input.Struct()[argNameFunction].Func() // func([]Value) (Value, error) - //if function == obj.function { // TODO: how can we cmp? - // continue // nothing changed - //} - obj.function = function - - inputs := input.Struct()[argNameInputs] - if obj.inputs != nil && obj.inputs.Cmp(inputs) == nil { - continue // nothing changed + value, exists := input.Struct()[mapArgNameFunction] + if !exists { + return fmt.Errorf("programming error, can't find edge") } - obj.inputs = inputs - // run the function on each index - output := []types.Value{} - for ix, v := range inputs.List() { // []Value - args := []types.Value{v} // only one input arg! - x, err := function(args) - if err != nil { - return errwrap.Wrapf(err, "error running map function on index %d", ix) + newFuncValue, ok := value.(*full.FuncValue) + if !ok { + return fmt.Errorf("programming error, can't convert to *FuncValue") + } + + newInputList, exists := input.Struct()[mapArgNameInputs] + if !exists { + return fmt.Errorf("programming error, can't find edge") + } + + // If we have a new function or the length of the input + // list has changed, then we need to replace the + // subgraph with a new one that uses the new function + // the correct number of times. + + // It's important to have this compare step to avoid + // redundant graph replacements which slow things down, + // but also cause the engine to lock, which can preempt + // the process scheduler, which can cause duplicate or + // unnecessary re-sending of values here, which causes + // the whole process to repeat ad-nauseum. + n := len(newInputList.List()) + if newFuncValue != obj.lastFuncValue || n != obj.lastInputListLength { + obj.lastFuncValue = newFuncValue + obj.lastInputListLength = n + // replaceSubGraph uses the above two values + if err := obj.replaceSubGraph(subgraphInput); err != nil { + return errwrap.Wrapf(err, "could not replace subgraph") } - - output = append(output, x) - } - result := &types.ListValue{ - V: output, - T: rtyp, + canReceiveMoreOutputLists = true } - if obj.result != nil && obj.result.Cmp(result) == nil { - continue // result didn't change + // send the new input list to the subgraph + select { + case inputChan <- newInputList: + case <-ctx.Done(): + return nil } - obj.result = result // store new result - case <-ctx.Done(): - return nil - } + case outputList, ok := <-obj.outputChan: + // send the new output list downstream + if !ok { + obj.outputChan = nil + canReceiveMoreOutputLists = false + continue + } + + select { + case obj.init.Output <- outputList: + case <-ctx.Done(): + return nil + } - select { - case obj.init.Output <- obj.result: // send - // pass case <-ctx.Done(): return nil } } } + +func (obj *MapFunc) replaceSubGraph(subgraphInput interfaces.Func) error { + // Create a subgraph which splits the input list into 'n' nodes, applies + // 'newFuncValue' to each, then combines the 'n' outputs back into a list. + // + // Here is what the subgraph looks like: + // + // digraph { + // "subgraphInput" -> "inputElemFunc0" + // "subgraphInput" -> "inputElemFunc1" + // "subgraphInput" -> "inputElemFunc2" + // + // "inputElemFunc0" -> "outputElemFunc0" + // "inputElemFunc1" -> "outputElemFunc1" + // "inputElemFunc2" -> "outputElemFunc2" + // + // "outputElemFunc0" -> "outputListFunc" + // "outputElemFunc1" -> "outputListFunc" + // "outputElemFunc1" -> "outputListFunc" + // + // "outputListFunc" -> "subgraphOutput" + // } + + const channelBasedSinkFuncArgNameEdgeName = structs.ChannelBasedSinkFuncArgName // XXX: not sure if the specific name matters. + + // delete the old subgraph + if err := obj.init.Txn.Reverse(); err != nil { + return errwrap.Wrapf(err, "could not Reverse") + } + + // create the new subgraph + + obj.outputChan = make(chan types.Value) + subgraphOutput := &structs.ChannelBasedSinkFunc{ + Name: "subgraphOutput", + Target: obj, + EdgeName: channelBasedSinkFuncArgNameEdgeName, + Chan: obj.outputChan, + Type: obj.outputListType, + } + obj.init.Txn.AddVertex(subgraphOutput) + + m := make(map[string]*types.Type) + ord := []string{} + for i := 0; i < obj.lastInputListLength; i++ { + argName := fmt.Sprintf("outputElem%d", i) + m[argName] = obj.RType + ord = append(ord, argName) + } + typ := &types.Type{ + Kind: types.KindFunc, + Map: m, + Ord: ord, + Out: obj.outputListType, + } + outputListFunc := structs.SimpleFnToDirectFunc( + "mapOutputList", + &types.FuncValue{ + V: func(args []types.Value) (types.Value, error) { + listValue := &types.ListValue{ + V: args, + T: obj.outputListType, + } + + return listValue, nil + }, + T: typ, + }, + ) + + obj.init.Txn.AddVertex(outputListFunc) + obj.init.Txn.AddEdge(outputListFunc, subgraphOutput, &interfaces.FuncEdge{ + Args: []string{channelBasedSinkFuncArgNameEdgeName}, + }) + + for i := 0; i < obj.lastInputListLength; i++ { + i := i + inputElemFunc := structs.SimpleFnToDirectFunc( + fmt.Sprintf("mapInputElem[%d]", i), + &types.FuncValue{ + V: func(args []types.Value) (types.Value, error) { + if len(args) != 1 { + return nil, fmt.Errorf("inputElemFunc: expected a single argument") + } + arg := args[0] + + list, ok := arg.(*types.ListValue) + if !ok { + return nil, fmt.Errorf("inputElemFunc: expected a ListValue argument") + } + + return list.List()[i], nil + }, + T: types.NewType(fmt.Sprintf("func(inputList %s) %s", obj.inputListType, obj.Type)), + }, + ) + obj.init.Txn.AddVertex(inputElemFunc) + + outputElemFunc, err := obj.lastFuncValue.Call(obj.init.Txn, []interfaces.Func{inputElemFunc}) + if err != nil { + return errwrap.Wrapf(err, "could not call obj.lastFuncValue.Call()") + } + + obj.init.Txn.AddEdge(subgraphInput, inputElemFunc, &interfaces.FuncEdge{ + Args: []string{"inputList"}, + }) + obj.init.Txn.AddEdge(outputElemFunc, outputListFunc, &interfaces.FuncEdge{ + Args: []string{fmt.Sprintf("outputElem%d", i)}, + }) + } + + return obj.init.Txn.Commit() +}