diff --git a/NuGet.config b/NuGet.config index f924a25..83bc738 100644 --- a/NuGet.config +++ b/NuGet.config @@ -1,4 +1,4 @@ - + @@ -11,6 +11,7 @@ + \ No newline at end of file diff --git a/notebooks/01_Introduction.ipynb b/notebooks/01_Introduction.ipynb index 9556fd8..84a5f33 100644 --- a/notebooks/01_Introduction.ipynb +++ b/notebooks/01_Introduction.ipynb @@ -38,7 +38,7 @@ "outputs": [], "source": [ "#r \"nuget:DwC-A_dotnet,0.6.0\"\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.8-Pre\"" + "#r \"nuget:DwC-A_dotnet.Interactive,0.1.9-Pre\"" ] }, { diff --git a/notebooks/02_Formatting.ipynb b/notebooks/02_Formatting.ipynb index c03525d..8702989 100644 --- a/notebooks/02_Formatting.ipynb +++ b/notebooks/02_Formatting.ipynb @@ -19,11 +19,11 @@ "language": "csharp" } }, + "outputs": [], "source": [ "#r \"nuget:DwC-A_dotnet,0.6.0\"\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.8-Pre\"" - ], - "outputs": [] + "#r \"nuget:DwC-A_dotnet.Interactive,0.1.9-Pre\"" + ] }, { "cell_type": "markdown", @@ -44,6 +44,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using DwC_A;\n", "using System.IO.Compression;\n", @@ -55,8 +56,7 @@ "ZipFile.ExtractToDirectory(\"./data/Papilionidae.zip\", outputPath);\n", "var archive = new ArchiveReader(outputPath);\n", "archive.MetaData" - ], - "outputs": [] + ] }, { "cell_type": "code", @@ -66,13 +66,13 @@ "language": "csharp" } }, + "outputs": [], "source": [ "var occurrence = archive.CoreFile;\n", "var multimedia = archive.Extensions.GetFileReaderByFileName(\"multimedia.txt\");\n", "display(occurrence);\n", "display(multimedia);" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -93,6 +93,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using Microsoft.DotNet.Interactive.Formatting;\n", "\n", @@ -104,8 +105,7 @@ " var output = div[style: \"font-style: italic\"]($\"{scientificName.genus} {scientificName.species}\");\n", " writer.Write(output);\n", "}, \"text/html\" );" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -124,6 +124,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using DwC_A.Terms;\n", "\n", @@ -132,8 +133,7 @@ " date = n[Terms.eventDate],\n", " scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet])\n", " })" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -152,6 +152,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "public record Link(string url);\n", "\n", @@ -166,8 +167,7 @@ " scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet]),\n", " link = new Link(n[Terms.references])\n", " })" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -188,6 +188,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "public record ImageUrl(string url, string caption = \"\");\n", "\n", @@ -198,8 +199,7 @@ " );\n", " writer.Write(fig);\n", "}, \"text/html\");" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -216,6 +216,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using DwC_A.Terms;\n", "\n", @@ -231,8 +232,7 @@ "}; \n", "\n", "media" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -251,6 +251,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "Formatter.Register>((images, writer) =>{\n", " var figs = new List();\n", @@ -261,8 +262,7 @@ "}, \"text/html\");\n", "\n", "media.Select(n => n.image)" - ], - "outputs": [] + ] } ], "metadata": { @@ -281,4 +281,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/03_DataFrame.ipynb b/notebooks/03_DataFrame.ipynb index 2691ee1..3740c65 100644 --- a/notebooks/03_DataFrame.ipynb +++ b/notebooks/03_DataFrame.ipynb @@ -19,14 +19,14 @@ "language": "csharp" } }, + "outputs": [], "source": [ "#r \"nuget: XPlot.Plotly, 4.0.6\"\n", "#r \"nuget: XPlot.Plotly.Interactive, 4.0.6\"\n", "#r \"nuget: Microsoft.Data.Analysis, 0.19.0\"\n", "#r \"nuget: DwC-A_dotnet, 0.6.0\"\n", - "#r \"nuget: DwC-a_dotnet.Interactive, 0.1.8-Pre\"" - ], - "outputs": [] + "#r \"nuget: DwC-a_dotnet.Interactive, 0.1.9-Pre\"" + ] }, { "cell_type": "markdown", @@ -45,6 +45,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using Microsoft.AspNetCore.Html;\n", "using Microsoft.DotNet.Interactive.Formatting;\n", @@ -79,8 +80,7 @@ "\n", " writer.Write(t);\n", "}, \"text/html\");" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -99,6 +99,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using DwC_A;\n", "using DwC_A.Terms;\n", @@ -121,8 +122,7 @@ "\n", "var eventID = eventFile.DataRows.Skip(2).Select(row => row[Terms.eventID]).First();\n", "display(eventID);" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -141,6 +141,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using Microsoft.Data.Analysis;\n", "\n", @@ -158,8 +159,7 @@ "\n", "var df = new DataFrame(species, counts);\n", "df.Info()" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -180,12 +180,12 @@ "language": "csharp" } }, + "outputs": [], "source": [ "var sum = (int)counts.Sum();\n", "double D = 1 - (double)(int)(counts.Apply(n => n * (n - 1)).Sum()) / (sum * (sum - 1));\n", "display($\"D = {D:0.00}\")" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -204,6 +204,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using XPlot.Plotly;\n", "\n", @@ -213,8 +214,7 @@ "var chart = Chart.Column(kvp);\n", "chart.WithTitle($\"Event: {eventID}\");\n", "display(chart)" - ], - "outputs": [] + ] } ], "metadata": { @@ -233,4 +233,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/04_FSharp.ipynb b/notebooks/04_FSharp.ipynb index 41926f0..4794671 100644 --- a/notebooks/04_FSharp.ipynb +++ b/notebooks/04_FSharp.ipynb @@ -21,14 +21,14 @@ "language": "fsharp" } }, + "outputs": [], "source": [ "#r \"nuget:Plotly.NET,2.0.0-preview.15\"\n", "#r \"nuget:Plotly.NET.Interactive,2.0.0-preview.15\"\n", "#r \"nuget:FSharp.Data,4.2.5\"\n", "#r \"nuget:DwC-A_dotnet,0.6.0\"\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.8-Pre\"" - ], - "outputs": [] + "#r \"nuget:DwC-A_dotnet.Interactive,0.1.9-Pre\"" + ] }, { "cell_type": "code", @@ -38,6 +38,7 @@ "language": "fsharp" } }, + "outputs": [], "source": [ "open DwC_A\n", "open DwC_A.Terms\n", @@ -55,8 +56,7 @@ "\n", "let occurrence = archive.CoreFile;\n", "occurrence" - ], - "outputs": [] + ] }, { "cell_type": "code", @@ -66,6 +66,7 @@ "language": "fsharp" } }, + "outputs": [], "source": [ "open System.Linq\n", "open Plotly.NET\n", @@ -99,8 +100,7 @@ " |> Chart.withTitle(title = \"Papilionidae of Texas\")\n", "\n", "map " - ], - "outputs": [] + ] } ], "metadata": { @@ -119,4 +119,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/05_Eml.ipynb b/notebooks/05_Eml.ipynb index 917d295..6471ff5 100644 --- a/notebooks/05_Eml.ipynb +++ b/notebooks/05_Eml.ipynb @@ -17,11 +17,11 @@ "language": "csharp" } }, + "outputs": [], "source": [ "#r \"nuget:DwC-A_dotnet,0.6.0\"\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.8-Pre\"" - ], - "outputs": [] + "#r \"nuget:DwC-A_dotnet.Interactive,0.1.9-Pre\"" + ] }, { "cell_type": "code", @@ -31,6 +31,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using DwC_A;\n", "using System.IO;\n", @@ -38,8 +39,7 @@ "var archive = new ArchiveReader(\"./data/dwca-rooftop-v1.4.zip\");\n", "var emlPath = Path.Combine(archive.OutputPath, archive.MetaData.Metadata);\n", "emlPath" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -60,6 +60,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using System.Xml;\n", "using System.Xml.XPath;\n", @@ -72,8 +73,7 @@ "\n", "var title = root.SelectSingleNode(\"dataset/title\", nsmgr).ToString();\n", "title" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -97,6 +97,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using Microsoft.DotNet.Interactive.Formatting;\n", "using System.Xml.XPath;\n", @@ -116,8 +117,7 @@ " }\n", " writer.Write(div(outputList));\n", "}, \"text/html\");" - ], - "outputs": [] + ] }, { "cell_type": "code", @@ -127,10 +127,10 @@ "language": "csharp" } }, + "outputs": [], "source": [ "root.Select(\"dataset/title\", nsmgr)" - ], - "outputs": [] + ] }, { "cell_type": "markdown", @@ -149,6 +149,7 @@ "language": "csharp" } }, + "outputs": [], "source": [ "using System.Xml.Xsl;\n", "using System.Xml;\n", @@ -164,8 +165,7 @@ " xslt.Transform(emlPath, writer);\n", " display(new HtmlString(stringWriter.ToString()));\n", "}" - ], - "outputs": [] + ] } ], "metadata": { @@ -184,4 +184,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/06_DataMapping.ipynb b/notebooks/06_DataMapping.ipynb index dad655b..c20cc90 100644 --- a/notebooks/06_DataMapping.ipynb +++ b/notebooks/06_DataMapping.ipynb @@ -4,16 +4,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Data Conversion and Mapping\n", + "# Mapping\n", "\n", - "File data fields can be converted to strongly typed objects using one of the Convert extension methods of the IRow interface. Documentation can be found [here](https://github.com/pjoiner/DwC-A_dotnet/wiki/Type-Conversion).\n", + "Using the [DwC-A_dotnet.Mapping](https://www.nuget.org/packages/DwC-A_dotnet.Mapping/) library we can map data from a [DwC-A_dotnet](https://www.nuget.org/packages/DwC-A_dotnet/) IRow to a strongly typed class. There are two different approaches to mapping data\n", "\n", - "We'll utilize the [DwC-A_dotnet.Mapping](https://github.com/pjoiner/DwC-A_dotnet.Mapping) extension library to map IRow fields to a model class." + "1. Using the dwca-codegen magic command to generate a class and mapping method from archive metadata. This requires the least code and is more interactive but can run into more issues.\n", + "1. Manually create a class definition and mapping method. This gives the most control over mapping but requires more effort. " ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -21,23 +22,33 @@ }, "outputs": [], "source": [ - "#r \"nuget:DwC-A_dotnet,0.6.0\"\n", - "#r \"nuget:DwC-A_dotnet.Mapping,0.6.1\"\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.8-Pre\"" + "#r \"nuget:DwC-A_dotnet.Interactive,0.1.9-Pre\"\n", + "#r \"nuget:DwC-A_dotnet.Mapping,0.6.3\"\n", + "#r \"nuget:Microsoft.ML\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Model Class\n", + "## Using The dwca-codegen Magic Command\n", "\n", - "First create the class that you wish to map values to. In this instance we are going to create an Occurrence class to display the scientific name and coordinates of the observation." + "First, we will map using the dwca-codegen magic command.\n", + "\n", + "### Create Configuration\n", + "\n", + "This step is optional but gives more control over how classes are generated and mapped. If this step is left out all properties will be mapped as string type.\n", + "\n", + "Use the GeneratorConfigurationBuilder to create a configuration to influence how the dwca-codegen command generates the class to be mapped for each file in the archive.\n", + "\n", + "Use the AppProperty method to define the properties that will be added to the generated classes and how they will be mapped to specific terms. Use the wildcard term __*__ to determine whether properties will be created for all other undefined terms and mapped.\n", + "\n", + "The WithMapMethod creates a static method on the type called MapRow that will be used to map an IRow row to an instance of the generated class." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -45,28 +56,32 @@ }, "outputs": [], "source": [ - "using DwC_A;\n", + "using DwC_A.Interactive.Mapping;\n", + "using DwC_A.Terms;\n", "\n", - "var outputPath = \"./data/Papilionidae.zip\";\n", - "var archive = new ArchiveReader(outputPath);\n", + "var config = new GeneratorConfigurationBuilder()\n", + ".AddProperty(\"*\", \"string\", true)\n", + ".AddProperty(Terms.decimalLatitude, \"double\", true, \"Latitude\")\n", + ".AddProperty(Terms.decimalLongitude, \"double\", true, \"Longitude\")\n", + ".AddProperty(Terms.dateIdentified, \"DateTime\")\n", + ".WithMapMethod(true)\n", + ".Build();\n", "\n", - "archive" + "config" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Class Definition\n", - "\n", - "The next cell loads a class definition to map the data into. This code can be created by hand or generated using the dotnet tool [dwca-codegen](https://www.nuget.org/packages/dwca-codegen/) which is available on NuGet. For more information on using [dwca-codegen](https://github.com/pjoiner/DwC-A_dotnet.Mapping/tree/master/src/dwca-codegen) see the [README.md](https://github.com/pjoiner/DwC-A_dotnet.Mapping/blob/master/src/dwca-codegen/README.md) file for that package / repo.\n", + "### dwca-codegen\n", "\n", - "**Note:** Make sure you set the namespace to \"\" when generating class files from an archive for use in dotnet interactive since namespaces aren't allowed." + "Use the dwca-codegen command to examine the archive and generate classes to map data into. The --configName option can be used to specify the name of the variable that contains the configuration information we created earlier. " ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -74,21 +89,22 @@ }, "outputs": [], "source": [ - "#load \"./Code/Occurrence.cs\"" + "#!dwca-codegen -h\n", + "#!dwca-codegen -c config ./data/Papilionidae.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Mapper\n", + "## Create Mapper\n", "\n", - "Next we create a mapper and define a method for mapping the fields of the IRow to the class." + "Now that we have class definitions and a mapping method we can define a mapper as follows." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -97,30 +113,76 @@ "outputs": [], "source": [ "using DwC_A.Mapping;\n", - "using DwC_A.Terms;\n", - "using DwC_A.Extensions;\n", - "\n", - "var mapper = MapperFactory.CreateMapper((o, row) => {\n", - " o.GbifID = row[\"http://rs.gbif.org/terms/1.0/gbifID\"];\n", - " o.Identifier = row[\"http://purl.org/dc/terms/identifier\"];\n", - " o.ScientificName = row[Terms.scientificName];\n", - " o.DecimalLongitude = row.Convert(Terms.decimalLongitude);\n", - " o.DecimalLatitude = row.Convert(Terms.decimalLatitude);\n", - "});" + "\n", + "var mapper = MapperFactory.CreateMapper(Occurrence.MapRow);\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Map Archive\n", + "\n", + "Finally, we can open the archive and query rows that we can map using the mapper.\n", + "\n", + "**Hint:** Use the Greedy RowStrategy for better performance when mapping the entire class.\n", + "\n", + "There are three different Map extensions for the IFileReader and IRow interfaces returned by the ArchiveReader." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + } + }, + "outputs": [], + "source": [ + "using DwC_A;\n", + "using DwC_A.Factories;\n", + "using DwC_A.Config;\n", + "\n", + "var factory = new DefaultFactory((cfg) => {\n", + " cfg.Add(c => c.Strategy = RowStrategy.Greedy);\n", + "});\n", + "\n", + "var archive = new ArchiveReader(\"./data/Papilionidae.zip\", factory);\n", + "\n", + "var occurrences = archive.CoreFile\n", + " .DataRows\n", + " .Where(row => row[Terms.decimalLatitude] != null)\n", + " .Where(row => row[Terms.dateIdentified] != null)\n", + " .Map(mapper);\n", + "\n", + "occurrences.Select(o => new {\n", + " o.ScientificName,\n", + " o.Latitude,\n", + " o.Longitude,\n", + " o.DateIdentified\n", + "})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Map\n", + "## Manual Mapping\n", "\n", - "Finally, we map the data using the Map methods of either the IRow or IFileReader interfaces." + "If you already have a class definition or want to create the class definition by hand then use this method.\n", + "\n", + "Classes may be defined two ways.\n", + "\n", + "1. Defined directly in a cell.\n", + "1. Loaded from a file on disk using the #load magic command.\n", + "\n", + "In this instance we'll load the class definition from a file. After that we can define a mapper and map method." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "dotnet_interactive": { "language": "csharp" @@ -128,10 +190,50 @@ }, "outputs": [], "source": [ - "archive.CoreFile\n", - " .Map(mapper)\n", - " .Take(100)\n", - " .Select(n => new{n.GbifID, n.Identifier, n.ScientificName, n.DecimalLatitude, n.DecimalLongitude})" + "#load \"./Code/Multimedia.cs\"\n", + "\n", + "using System;\n", + "\n", + "var multimediaMapper = MapperFactory.CreateMapper((m, row) => {\n", + " m.GbifID = row.Convert(\"http://rs.gbif.org/terms/1.0/gbifID\");\n", + " m.Type = row[\"http://purl.org/dc/terms/type\"];\n", + " m.Identifier = row[\"http://purl.org/dc/terms/identifier\"];\n", + " m.Created = row.Convert(\"http://purl.org/dc/terms/created\");\n", + "});\n", + "\n", + "archive.Extensions\n", + " .GetFileReadersByRowType(\"http://rs.gbif.org/terms/1.0/Multimedia\")\n", + " .FirstOrDefault()?\n", + " .Map(multimediaMapper)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using With Microsoft.ML\n", + "\n", + "Now that we have mapped an IEnumerable of Occurrences we can load the data into an IDataView or DataFrame using MLContext from [Microsoft.ML](https://www.nuget.org/packages/Microsoft.ML/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + } + }, + "outputs": [], + "source": [ + "using Microsoft.ML;\n", + "using Microsoft.ML.Data;\n", + "\n", + "var mlContext = new MLContext();\n", + "\n", + "var data = mlContext.Data.LoadFromEnumerable(occurrences);\n", + "\n", + "data.Schema" ] } ], diff --git a/notebooks/Code/Multimedia.cs b/notebooks/Code/Multimedia.cs index ad172e1..c2dbb87 100644 --- a/notebooks/Code/Multimedia.cs +++ b/notebooks/Code/Multimedia.cs @@ -1,18 +1,10 @@ -public partial class Multimedia +using System; + +public partial class Multimedia { - public string GbifID { get; set; } + public long GbifID { get; set; } public string Type { get; set; } - public string Format { get; set; } public string Identifier { get; set; } - public string References { get; set; } - public string Title { get; set; } - public string Description { get; set; } - public string Source { get; set; } - public string Audience { get; set; } - public string Created { get; set; } + public DateTime Created { get; set; } public string Creator { get; set; } - public string Contributor { get; set; } - public string Publisher { get; set; } - public string License { get; set; } - public string RightsHolder { get; set; } } \ No newline at end of file diff --git a/src/DwC-A_dotnet.Interactive/Commands/DwcaCodegenCommand.cs b/src/DwC-A_dotnet.Interactive/Commands/DwcaCodegenCommand.cs new file mode 100644 index 0000000..4ea1f71 --- /dev/null +++ b/src/DwC-A_dotnet.Interactive/Commands/DwcaCodegenCommand.cs @@ -0,0 +1,82 @@ +extern alias Core; + +using Core::DwC_A; +using DwC_A.Config; +using DwC_A.Generator; +using DwC_A.Interactive.Mapping; +using Microsoft.DotNet.Interactive; +using Microsoft.DotNet.Interactive.Events; +using Microsoft.DotNet.Interactive.ValueSharing; +using System; +using System.CommandLine; +using System.CommandLine.Invocation; +using System.IO; +using System.Threading.Tasks; + +namespace DwC_A.Interactive.Commands +{ + internal class DwcaCodegenCommand : Command + { + public DwcaCodegenCommand() + : base("#!dwca-codegen", "Generate strongly typed class files for Darwin Core Archive") + { + AddArgument(new Argument() + { + Name = "archivePath", + Description = "Path to archive folder or zip file" + }); + + AddOption(new Option( + aliases: new[] {"-c", "--configName"}, + description: "Name of configuration variable", + getDefaultValue: () => "" + )); + + Handler = CommandHandler.Create((Func)(async (context, archivePath, configName) => + { + var archive = new ArchiveReader(archivePath); + + var csharpKernel = (ISupportGetValue)context.HandlingKernel.FindKernel("csharp"); + if (!csharpKernel.TryGetValue(configName, out IGeneratorConfiguration config)) + { + config = new GeneratorConfigurationBuilder().Build(); + } + context.Display($"Opening archive {archive.FileName} using configuration", new[] { "text/html" }); + context.Display(config, new[] { "text/html" }); + + await GenerateClass(context, archive.CoreFile, config); + foreach(var extension in archive.Extensions.GetFileReaders()) + { + await GenerateClass(context, extension, config); + } + })); + } + + private static async Task GenerateClass(KernelInvocationContext context, + IFileReader fileReader, + IGeneratorConfiguration config) + { + var classGenerator = new ClassGenerator(); + var className = Path.GetFileNameWithoutExtension(fileReader.FileName); + className = char.ToUpper(className[0]) + className.Substring(1); + context.Display($"Generating class {className}", new[] { "text/html" }); + var source = classGenerator.GenerateFile(fileReader.FileMetaData, config); + var result = await context.HandlingKernel.SubmitCodeAsync(source); + result.KernelEvents.Subscribe((ev) => { }, (ex) => + { + context.Display(ex.Message, new[] { "text/plain" }); + }); + result.KernelEvents.Subscribe((ev) => + { + if(ev is ErrorProduced error) + { + context.Fail(context.Command, null, error.Message); + } + if (ev is CommandFailed failure) + { + context.Fail(context.Command, null, failure.Message); + } + }); + } + } +} diff --git a/src/DwC-A_dotnet.Interactive/Commands/TermsCommand.cs b/src/DwC-A_dotnet.Interactive/Commands/TermsCommand.cs new file mode 100644 index 0000000..aa18dab --- /dev/null +++ b/src/DwC-A_dotnet.Interactive/Commands/TermsCommand.cs @@ -0,0 +1,19 @@ +using Microsoft.DotNet.Interactive; +using System; +using System.CommandLine; +using System.CommandLine.Invocation; + +namespace DwC_A.Interactive.Commands +{ + internal class TermsCommand : Command + { + public TermsCommand() : base("#!terms", "Display Darwin Core standard terms") + { + Handler = CommandHandler.Create((KernelInvocationContext invocationContext) => + { + var defaultTerms = new DefaultTerms(); + invocationContext.Display(defaultTerms); + }); + } + } +} diff --git a/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj b/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj index beb8f1e..e12aba7 100644 --- a/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj +++ b/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj @@ -11,10 +11,10 @@ DwC-A darwin-core Biodiversity Paul Joiner Paul Joiner - 0.1.8-Pre + 0.1.9-Pre .NET Interactive Extensions for Darwin Core Archive file reader - 0.1.8.0 - 0.1.8.0 + 0.1.9.0 + 0.1.9.0 LICENSE README.md @@ -23,8 +23,9 @@ Core + - + diff --git a/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs b/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs index 5ba5916..e844854 100644 --- a/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs +++ b/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs @@ -8,6 +8,8 @@ using System.CommandLine; using System.CommandLine.Invocation; using System.Collections.Generic; +using DwC_A.Interactive.Commands; +using DwC_A.Config; namespace DwC_A.Interactive { @@ -20,17 +22,10 @@ public Task OnLoadAsync(Kernel kernel) Formatter.Register(FileReaderMetaData.Register, "text/html"); Formatter.Register(TermsFormatter.Register, "text/html"); Formatter.Register>(RowFormatter.Register, "text/html"); + Formatter.Register(GeneratorConfigFormatter.Register, "text/html"); - var termsCommand = new Command("#!terms", "Display Darwin Core standard terms") - { - Handler = CommandHandler.Create((KernelInvocationContext invocationContext) => - { - var defaultTerms = new DefaultTerms(); - invocationContext.Display(defaultTerms); - }) - }; - - kernel.AddDirective(termsCommand); + kernel.AddDirective(new TermsCommand()); + kernel.AddDirective(new DwcaCodegenCommand()); return Task.CompletedTask; } diff --git a/src/DwC-A_dotnet.Interactive/Formatters/GeneratorConfigFormatter.cs b/src/DwC-A_dotnet.Interactive/Formatters/GeneratorConfigFormatter.cs new file mode 100644 index 0000000..c006beb --- /dev/null +++ b/src/DwC-A_dotnet.Interactive/Formatters/GeneratorConfigFormatter.cs @@ -0,0 +1,86 @@ +using DwC_A.Config; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; + +namespace DwC_A.Interactive.Formatters +{ + internal class GeneratorConfigFormatter + { + public static void Register(IGeneratorConfiguration config, TextWriter writer) + { + var mainHeader = thead(tr( + th("Option"), + th("Value") + )); + var mainBody = MainRows(config); + var mainTable = table( + mainHeader, + mainBody + ); + + var propertyHeader = thead(tr( + th("Name"), + th("Type"), + th("Include"), + th("Term") + )); + var propertyRows = new List(); + foreach(var property in config.Properties) + { + propertyRows.Add(tr(new[] + { + td(property.Value.PropertyName), + td(property.Value.TypeName), + td(property.Value.Include), + td(property.Key) + })); + } + var propertyBody = tbody(propertyRows); + var propertyTable = table( + propertyHeader, + propertyBody + ); + + writer.Write( + div( + div(h3("Options")), + div(mainTable), + div(h3("Properties")), + div(propertyTable) + ) + ); + } + + private static dynamic MainRows(IGeneratorConfiguration config) + { + return tbody(new[] + { + MainRow(nameof(config.Namespace), config.Namespace), + MainRow(nameof(config.MapMethod), config.MapMethod), + MainRow(nameof(config.Output), config.Output), + MainRow(nameof(config.PascalCase), config.PascalCase), + MainRow(nameof(config.TermAttribute), config.TermAttribute), + AddUsings(config) + }); + } + + private static dynamic MainRow(string configName, object configValue) + { + return tr( + td(configName), + td(configValue.ToString()) + ); + } + + private static dynamic AddUsings(IGeneratorConfiguration config) + { + return tr( + td("Usings"), + td(config.Usings.Select(u => ul(u))) + ); + } + } +} diff --git a/src/DwC-A_dotnet.Interactive/Mapping/GeneratorConfigurationBuilder.cs b/src/DwC-A_dotnet.Interactive/Mapping/GeneratorConfigurationBuilder.cs new file mode 100644 index 0000000..158cc61 --- /dev/null +++ b/src/DwC-A_dotnet.Interactive/Mapping/GeneratorConfigurationBuilder.cs @@ -0,0 +1,108 @@ +using DwC_A.Config; +using System.Collections.Generic; +using System.Linq; + +namespace DwC_A.Interactive.Mapping +{ + public class GeneratorConfigurationBuilder + { + private const string ExtensionNamespace = "DwC_A.Extensions"; + private const string SystemNamespace = "System"; + private const string CoreNamespace = "DwC_A"; + private const string AllTerms = "*"; + + internal class GeneratorConfiguration : IGeneratorConfiguration + { + private Dictionary properties = new Dictionary() + { + { AllTerms, new PropertyConfiguration() } + }; + private HashSet usings = new HashSet(new[] { SystemNamespace }); + public string Namespace { get; set; } = ""; + public string Output { get; set; } = ""; + public bool PascalCase { get; set; } = true; + public bool MapMethod { get; set; } = true; + public IDictionary Properties => properties; + public TermAttributeType TermAttribute { get; set; } = TermAttributeType.none; + public IList Usings => usings.ToList(); + public PropertyConfiguration GetPropertyConfiguration(string term) + { + return Properties.ContainsKey(term) ? + Properties[term] : + Properties[AllTerms]; + } + internal void AddUsing(string namespaceName) + { + usings.Add(namespaceName); + } + internal void AddProperty(string term, string typeName, bool include = true, string propertyName = null) + { + if (properties.ContainsKey(term)) + { + properties.Remove(term); + } + properties.Add(term, new PropertyConfiguration() + { + TypeName = typeName, + PropertyName = propertyName, + Include = include + }); + } + } + + private GeneratorConfiguration config = new GeneratorConfiguration(); + + public GeneratorConfigurationBuilder WithNamespace(string namespaceName) + { + config.Namespace = namespaceName; + return this; + } + + public GeneratorConfigurationBuilder WithPascalCase(bool pascalCase) + { + config.PascalCase = pascalCase; + return this; + } + + public GeneratorConfigurationBuilder WithTermAttribute(TermAttributeType termAttribute) + { + config.TermAttribute = termAttribute; + return this; + } + + public GeneratorConfigurationBuilder AddUsing(string usingNamespace) + { + config.AddUsing(usingNamespace); + return this; + } + + public GeneratorConfigurationBuilder WithOutput(string output) + { + config.Output = output; + return this; + } + + public GeneratorConfigurationBuilder WithMapMethod(bool mapMethod) + { + config.MapMethod = mapMethod; + if(mapMethod) + { + config.AddUsing(CoreNamespace); + config.AddUsing(ExtensionNamespace); + } + return this; + } + + public GeneratorConfigurationBuilder AddProperty(string term, string typeName, bool include = true, string propertyName = null) + { + config.AddProperty(term, typeName, include, propertyName); + return this; + } + + public IGeneratorConfiguration Build() + { + return config; + } + + } +} diff --git a/src/UnitTests/GeneratorConfigurationFormatterTests.cs b/src/UnitTests/GeneratorConfigurationFormatterTests.cs new file mode 100644 index 0000000..b08c805 --- /dev/null +++ b/src/UnitTests/GeneratorConfigurationFormatterTests.cs @@ -0,0 +1,25 @@ +using DwC_A.Interactive.Formatters; +using DwC_A.Interactive.Mapping; +using DwC_A.Terms; +using System.IO; +using Xunit; + +namespace UnitTests +{ + public class GeneratorConfigurationFormatterTests + { + [Fact] + public void ShouldFormatHeader() + { + var config = new GeneratorConfigurationBuilder() + .AddProperty(Terms.decimalLatitude, "double", true, "Latitude") + .Build(); + using var tw = new StringWriter(); + + GeneratorConfigFormatter.Register(config, tw); + + var html = tw.ToString(); + Assert.NotNull(html); + } + } +}