feature complete now: dont use flags, generalize parse args, support …

…slices, OpenFS
emer · Jul 4, 2023 · 8eb49b2 · 8eb49b2
1 parent b1bd493
commit 8eb49b2
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 89 deletions.
diff --git a/econfig/README.md b/econfig/README.md
@@ -1,11 +1,5 @@
 Docs: [GoDoc](https://pkg.go.dev/github.com/emer/emergent/econfig)
 
-TODO:
-* slice fields
-* OpenFS
-* flag args are conflicting with config args -- just support all flags directly.
-
-
 `econfig` provides methods to set values on a `Config` struct through a (TOML) config file or command-line args (`flags` in Go terminology), with support for setting Network params and values on any other struct as well (e.g., an Env to be constructed later in a ConfigEnv method).
 
 * Standard usage:
@@ -34,55 +28,66 @@ TODO:
 
 # Special fields, supported types, and field tags
 
-* A limited number of standard field types are supported, consistent with emer neural network usage:
-    + `bool` and `[]bool`
-    + `float32` and `[]float32`
-    + `int` and `[]int`
-    + `string` and `[]string`
-    + [kit](https://github.com/goki/ki) registered "enum" `const` types, with names automatically parsed from string values (including | bit flags).  Must use the [goki stringer](https://github.com/goki/stringer) version to generate `FromString()` method, and register the type like this: `var KiT_TestEnum = kit.Enums.AddEnum(TestEnumN, kit.NotBitFlag, nil)` -- see [enum.go](enum.go) file for example.
-
 * To enable include file processing, add a `Includes []string` field and a `func (cfg *Config) IncludesPtr() *[]string { return &cfg.Includes }` method.  The include file(s) are read first before the current one.  A stack of such includes is created and processed in the natural order encountered, so each includer is applied after the includees, recursively.  Note: use `--config` to specify the first config file read -- the `Includes` field is excluded from arg processing because it would be processed _after_ the point where include files are processed.
 
 * `Field map[string]any` -- allows raw parsing of values that can be applied later.  Use this for `Network`, `Env` etc fields.
 
 * Field tag `def:"value"`, used in the [GoGi](https://github.com/goki/gi) GUI, sets the initial default value and is shown for the `-h` or `--help` usage info.
 
-# Standard Config
+* [kit](https://github.com/goki/ki) registered "enum" `const` types, with names automatically parsed from string values (including bit flags).  Must use the [goki stringer](https://github.com/goki/stringer) version to generate `FromString()` method, and register the type like this: `var KitTestEnum = kit.Enums.AddEnum(TestEnumN, kit.NotBitFlag, nil)` -- see [enum.go](enum.go) file for example.
 
-Here's a standard `Config` struct, corresponding to the `AddStd` args from `ecmd`, which can be used as a starting point.
+# Standard Config Example
+
+Here's the `Config` struct from [axon/examples/ra25](https://github.com/emer/axon), which can provide a useful starting point.  It uses Params, Run and Log sub-structs to better organize things.  For sims with extensive Env config, that should be added as a separate sub-struct as well.  The `view:"add-fields"` struct tag shows all of the fields in one big dialog in the GUI -- if you want separate ones, omit that.
 
 ```Go
+// ParamConfig has config parameters related to sim params
+type ParamConfig struct {
+	Network map[string]any `desc:"network parameters"`
+	Set     string         `desc:"ParamSet name to use -- must be valid name as listed in compiled-in params or loaded params"`
+	File    string         `desc:"Name of the JSON file to input saved parameters from."`
+	Tag     string         `desc:"extra tag to add to file names and logs saved from this run"`
+	Note    string         `desc:"user note -- describe the run params etc -- like a git commit message for the run"`
+	SaveAll bool           `desc:"Save all current param and config files (params_cur.toml, params_layers.txt, params_prjns.txt, config_cur.toml) then quit"`
+}
+
+// RunConfig has config parameters related to running the sim
+type RunConfig struct {
+	GPU          bool   `desc:"use the GPU for computation -- generally faster even for small models if NData ~16"`
+	Threads      int    `def:"0" desc:"number of parallel threads for CPU computation -- 0 = use default"`
+	Run          int    `def:"0" desc:"starting run number -- determines the random seed -- runs counts from there -- can do all runs in parallel by launching separate jobs with each run, runs = 1"`
+	Runs         int    `def:"5" min:"1" desc:"total number of runs to do when running Train"`
+	Epochs       int    `def:"100" desc:"total number of epochs per run"`
+	NZero        int    `def:"2" desc:"stop run after this number of perfect, zero-error epochs"`
+	NTrials      int    `def:"32" desc:"total number of trials per epoch.  Should be an even multiple of NData."`
+	NData        int    `def:"16" min:"1" desc:"number of data-parallel items to process in parallel per trial -- works (and is significantly faster) for both CPU and GPU.  Results in an effective mini-batch of learning."`
+	TestInterval int    `def:"5" desc:"how often to run through all the test patterns, in terms of training epochs -- can use 0 or -1 for no testing"`
+	PCAInterval  int    `def:"5" desc:"how frequently (in epochs) to compute PCA on hidden representations to measure variance?"`
+	StartWts     string `desc:"if non-empty, is the name of weights file to load at start of first run -- for testing"`
+}
+
+// LogConfig has config parameters related to logging data
+type LogConfig struct {
+	SaveWts      bool `desc:"if true, save final weights after each run"`
+	EpochLog     bool `def:"true" desc:"if true, save train epoch log to file, as .epc.tsv typically"`
+	RunLog       bool `def:"true" desc:"if true, save run log to file, as .run.tsv typically"`
+	TrialLog     bool `def:"false" desc:"if true, save train trial log to file, as .trl.tsv typically. May be large."`
+	TestEpochLog bool `def:"false" desc:"if true, save testing epoch log to file, as .tst_epc.tsv typically.  In general it is better to copy testing items over to the training epoch log and record there."`
+	TestTrialLog bool `def:"false" desc:"if true, save testing trial log to file, as .tst_trl.tsv typically. May be large."`
+	NetData      bool `desc:"if true, save network activation etc data from testing trials, for later viewing in netview"`
+}
+
 // Config is a standard Sim config -- use as a starting point.
-// don't forget to update defaults, delete unused fields, etc.
 type Config struct {
-	Includes     []string       `desc:"specify include files here, and after configuration, it contains list of include files added"`
-	GUI          bool           `def:"true" desc:"open the GUI -- does not automatically run -- if false, then runs automatically and quits"`
-	GPU          bool           `desc:"use the GPU for computation"`
-	Debug        bool           `desc:"log debugging information"`
-	Network      map[string]any `desc:"network parameters"`
-	ParamSet     string         `desc:"ParamSet name to use -- must be valid name as listed in compiled-in params or loaded params"`
-	ParamFile    string         `desc:"Name of the JSON file to input saved parameters from."`
-	ParamDocFile string         `desc:"Name of the file to output all parameter data. If not empty string, program should write file(s) and then exit"`
-	Tag          string         `desc:"extra tag to add to file names and logs saved from this run"`
-	Note         string         `desc:"user note -- describe the run params etc -- like a git commit message for the run"`
-	Run          int            `def:"0" desc:"starting run number -- determines the random seed -- runs counts from there -- can do all runs in parallel by launching separate jobs with each run, runs = 1"`
-	Runs         int            `def:"10" desc:"total number of runs to do when running Train"`
-	Epochs       int            `def:"100" desc:"total number of epochs per run"`
-	NTrials      int            `def:"128" desc:"total number of trials per epoch.  Should be an even multiple of NData."`
-	NData        int            `def:"16" desc:"number of data-parallel items to process in parallel per trial -- works (and is significantly faster) for both CPU and GPU.  Results in an effective mini-batch of learning."`
-	TestInterval int            `def:"5" desc:"how often to run through all the test patterns, in terms of training epochs -- can use 0 or -1 for no testing"`
-	PCAInterval  int            `def:"5" desc:"how frequently (in epochs) to compute PCA on hidden representations to measure variance?"`
-	SaveWts      bool           `desc:"if true, save final weights after each run"`
-	EpochLog     bool           `def:"true" desc:"if true, save train epoch log to file, as .epc.tsv typically"`
-	RunLog       bool           `def:"true" desc:"if true, save run log to file, as .run.tsv typically"`
-	TrialLog     bool           `def:"true" desc:"if true, save train trial log to file, as .trl.tsv typically. May be large."`
-	TestEpochLog bool           `def:"false" desc:"if true, save testing epoch log to file, as .tst_epc.tsv typically.  In general it is better to copy testing items over to the training epoch log and record there."`
-	TestTrialLog bool           `def:"false" desc:"if true, save testing trial log to file, as .tst_trl.tsv typically. May be large."`
-	NetData      bool           `desc:"if true, save network activation etc data from testing trials, for later viewing in netview"`
+	Includes []string    `desc:"specify include files here, and after configuration, it contains list of include files added"`
+	GUI      bool        `def:"true" desc:"open the GUI -- does not automatically run -- if false, then runs automatically and quits"`
+	Debug    bool        `desc:"log debugging information"`
+	Params   ParamConfig `view:"add-fields" desc:"parameter related configuration options"`
+	Run      RunConfig   `view:"add-fields" desc:"sim running related configuration options"`
+	Log      LogConfig   `view:"add-fields" desc:"data logging related configuration options"`
 }
 
 func (cfg *Config) IncludesPtr() *[]string { return &cfg.Includes }
-
 ```    
 
 # Key design considerations
@@ -91,7 +96,7 @@ func (cfg *Config) IncludesPtr() *[]string { return &cfg.Includes }
     + current axon models only support args. obelisk models only support TOML.  conflicts happen.
 
 * Sims use a Config struct with fields that represents the definitive value of all arg / config settings (vs a `map[string]interface{}`)
-    + struct provides _compile time_ error checking -- very important and precludes map.
+    + struct provides _compile time_ error checking (and IDE completion) -- very important and precludes map.
     + Add Config to Sim so it is visible in the GUI for easy visual debugging etc (current args map is organized by types -- makes it hard to see everything).
 
 * Enable setting Network or Env params directly:

diff --git a/econfig/args.go b/econfig/args.go
@@ -25,48 +25,53 @@ import (
 // must refer to fields in the config, so any that fail to match trigger
 // an error.  Errors can also result from parsing.
 // Errors are automatically logged because these are user-facing.
-func SetFromArgs(cfg any, args []string) (leftovers []string, err error) {
-	leftovers, err = parseArgs(cfg, args)
+func SetFromArgs(cfg any, args []string) (nonFlags []string, err error) {
+	allArgs := make(map[string]reflect.Value)
+	CommandArgs(allArgs) // need these to not trigger not-found errors
+	FieldArgNames(cfg, allArgs)
+	nonFlags, err = ParseArgs(cfg, args, allArgs, true)
 	if err != nil {
 		fmt.Println(Usage(cfg))
 	}
 	return
 }
 
-// parseArgs does the actual arg parsing
-func parseArgs(cfg any, args []string) ([]string, error) {
-	allArgs := FieldArgNames(cfg)
-	var leftovers []string
+// ParseArgs parses given args using map of all available args
+// setting the value accordingly, and returning any leftover args.
+// setting errNotFound = true causes args that are not in allArgs to
+// trigger an error.  Otherwise, it just skips those.
+func ParseArgs(cfg any, args []string, allArgs map[string]reflect.Value, errNotFound bool) ([]string, error) {
+	var nonFlags []string
 	var err error
 	for len(args) > 0 {
 		s := args[0]
 		args = args[1:]
 		if len(s) == 0 || s[0] != '-' || len(s) == 1 {
-			leftovers = append(leftovers, s)
+			nonFlags = append(nonFlags, s)
 			continue
 		}
 
 		if s[1] == '-' && len(s) == 2 { // "--" terminates the flags
 			// f.argsLenAtDash = len(f.args)
-			leftovers = append(leftovers, args...)
+			nonFlags = append(nonFlags, args...)
 			break
 		}
-		args, err = parseArg(s, args, allArgs)
+		args, err = ParseArg(s, args, allArgs, errNotFound)
 		if err != nil {
-			return leftovers, err
+			return nonFlags, err
 		}
 	}
-	return leftovers, nil
+	return nonFlags, nil
 }
 
-func parseArg(s string, args []string, allArgs map[string]reflect.Value) (a []string, err error) {
+func ParseArg(s string, args []string, allArgs map[string]reflect.Value, errNotFound bool) (a []string, err error) {
 	a = args
 	name := s[1:]
 	if name[0] == '-' {
 		name = name[1:]
 	}
 	if len(name) == 0 || name[0] == '-' || name[0] == '=' {
-		err = fmt.Errorf("SetFromArgs: bad flag syntax: %s", s)
+		err = fmt.Errorf("econfig.ParseArgs: bad flag syntax: %s", s)
 		log.Println(err)
 		return
 	}
@@ -75,8 +80,10 @@ func parseArg(s string, args []string, allArgs map[string]reflect.Value) (a []st
 	name = split[0]
 	fval, exists := allArgs[name]
 	if !exists {
-		err = fmt.Errorf("SetFromArgs: flag name not recognized: %s", name)
-		log.Println(err)
+		if errNotFound {
+			err = fmt.Errorf("econfig.ParseArgs: flag name not recognized: %s", name)
+			log.Println(err)
+		}
 		return
 	}
 
@@ -111,21 +118,22 @@ func parseArg(s string, args []string, allArgs map[string]reflect.Value) (a []st
 		a = a[1:]
 	default:
 		// '--flag' (arg was required)
-		err = fmt.Errorf("SetFromArgs: flag needs an argument: %s", s)
+		err = fmt.Errorf("econfig.ParseArgs: flag needs an argument: %s", s)
 		log.Println(err)
 		return
 	}
 
-	err = setArgValue(name, fval, value)
+	err = SetArgValue(name, fval, value)
 	return
 }
 
-func setArgValue(name string, fval reflect.Value, value string) error {
+// SetArgValue sets given arg name to given value, into settable reflect.Value
+func SetArgValue(name string, fval reflect.Value, value string) error {
 	nptyp := kit.NonPtrType(fval.Type())
 	vk := nptyp.Kind()
 	switch {
 	case vk >= reflect.Int && vk <= reflect.Uint64 && kit.Enums.TypeRegistered(nptyp):
-		return kit.SetEnumValueFromString(fval, value)
+		return kit.Enums.SetAnyEnumValueFromString(fval, value)
 	case vk == reflect.Map:
 		mval := make(map[string]any)
 		err := ReadBytes(&mval, []byte("tmp="+value)) // use toml decoder
@@ -135,27 +143,39 @@ func setArgValue(name string, fval reflect.Value, value string) error {
 		}
 		ok := kit.SetRobust(fval.Interface(), mval["tmp"])
 		if !ok {
-			err := fmt.Errorf("SetFromArgs: not able to set field from arg: %s val: %s", name, value)
+			err := fmt.Errorf("econfig.ParseArgs: not able to set field from arg: %s val: %s", name, value)
+			log.Println(err)
+			return err
+		}
+	case vk == reflect.Slice:
+		mval := make(map[string]any)
+		err := ReadBytes(&mval, []byte("tmp="+value)) // use toml decoder
+		if err != nil {
+			log.Println(err)
+			return err
+		}
+		err = kit.CopySliceRobust(fval, reflect.ValueOf(mval["tmp"]))
+		if err != nil {
+			log.Println(err)
+			err = fmt.Errorf("econfig.ParseArgs: not able to set field from arg: %s val: %s", name, value)
 			log.Println(err)
 			return err
 		}
 	default:
 		ok := kit.SetRobust(fval.Interface(), value) // overkill but whatever
 		if !ok {
-			err := fmt.Errorf("SetFromArgs: not able to set field from arg: %s val: %s", name, value)
+			err := fmt.Errorf("econfig.ParseArgs: not able to set field from arg: %s val: %s", name, value)
 			log.Println(err)
 			return err
 		}
 	}
 	return nil
 }
 
-// FieldArgNames returns map of all the different ways the field names
+// FieldArgNames adds to given args map all the different ways the field names
 // can be specified as arg flags, mapping to the reflect.Value
-func FieldArgNames(obj any) map[string]reflect.Value {
-	allArgs := make(map[string]reflect.Value)
+func FieldArgNames(obj any, allArgs map[string]reflect.Value) {
 	fieldArgNamesStruct(obj, "", allArgs)
-	return allArgs
 }
 
 func addAllCases(nm, path string, pval reflect.Value, allArgs map[string]reflect.Value) {
@@ -195,3 +215,12 @@ func fieldArgNamesStruct(obj any, path string, allArgs map[string]reflect.Value)
 		}
 	}
 }
+
+// CommandArgs adds non-field args that control the config process:
+// -config -cfg -help -h
+func CommandArgs(allArgs map[string]reflect.Value) {
+	allArgs["config"] = reflect.ValueOf(&ConfigFile)
+	allArgs["cfg"] = reflect.ValueOf(&ConfigFile)
+	allArgs["help"] = reflect.ValueOf(&Help)
+	allArgs["h"] = reflect.ValueOf(&Help)
+}
diff --git a/econfig/config.go b/econfig/config.go
@@ -5,9 +5,9 @@
 package econfig
 
 import (
-	"flag"
 	"fmt"
 	"os"
+	"reflect"
 )
 
 var (
@@ -20,47 +20,56 @@ var (
 	// specified in Include field or via the command line --config --cfg or -c args.
 	// Set this prior to calling Config -- default is just current directory '.'
 	IncludePaths = []string{"."}
+
+	//	NonFlagArgs are the command-line args that remain after all the flags have
+	// been processed.  This is set after the call to Config.
+	NonFlagArgs = []string{}
+
+	// ConfigFile is the name of the config file actually loaded, specified by the
+	// -config or -cfg command-line arg or the default file given in Config
+	ConfigFile string
+
+	// Help is variable target for -help or -h args
+	Help bool
 )
 
 // Config is the overall config setting function, processing config files
 // and command-line arguments, in the following order:
 //   - Apply any `def:` field tag default values.
 //   - Look for `--config`, `--cfg`, or `-c` arg, specifying a config file on the command line.
 //   - Fall back on default config file name passed to `Config` function, if arg not found.
-//   - Read any `Include[s]` files in config file in deepest-first (natural) order, then the specified config file last.
-//   - Process command-line args based on Config field names, with `.` separator for sub-fields (see field tags for shorthand and aliases)
+//   - Read any `Include[s]` files in config file in deepest-first (natural) order,
+//     then the specified config file last.
+//   - Process command-line args based on Config field names, with `.` separator
+//     for sub-fields (see field tags for shorthand and aliases)
+//
+// Also processes -help or -h and prints usage and quits immediately.
 func Config(cfg any, defaultFile string) ([]string, error) {
 	var errs []error
 	err := SetFromDefaults(cfg)
 	if err != nil {
 		errs = append(errs, err)
 	}
 
-	helpArg := flag.Bool("help", false, "show available command-line arguments and exit")
-	hArg := flag.Bool("h", false, "show available command-line arguments and exit")
-	configArg := flag.String("config", "", "filename / path for loading Config settings")
-	cfgArg := flag.String("cfg", "", "filename / path for loading Config settings")
-	flag.Parse()
+	allArgs := make(map[string]reflect.Value)
+	CommandArgs(allArgs)
 
-	if *helpArg || *hArg {
-		flag.PrintDefaults()
-		fmt.Println("")
+	args := os.Args[1:]
+	_, err = ParseArgs(cfg, args, allArgs, false) // false = ignore non-matches
+
+	if Help {
 		fmt.Println(Usage(cfg))
 		os.Exit(0)
 	}
 
-	file := defaultFile
-	if *configArg != "" {
-		file = *configArg
-	} else if *cfgArg != "" {
-		file = *cfgArg
+	if ConfigFile == "" {
+		ConfigFile = defaultFile
 	}
-
-	err = OpenWithIncludes(cfg, file)
+	err = OpenWithIncludes(cfg, ConfigFile)
 	if err != nil {
 		errs = append(errs, err)
 	}
-	args, err := SetFromArgs(cfg, flag.Args())
+	NonFlagArgs, err = SetFromArgs(cfg, args)
 	if err != nil {
 		errs = append(errs, err)
 	}