diff --git a/Nuget.config b/Nuget.config deleted file mode 100644 index f924a25..0000000 --- a/Nuget.config +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/notebooks/01_Introduction.dib b/notebooks/01_Introduction.dib new file mode 100644 index 0000000..75185a0 --- /dev/null +++ b/notebooks/01_Introduction.dib @@ -0,0 +1,129 @@ +#!markdown + +# Using DwC-A_dotnet.Interactive + +This notebook describes how to use DwC-A_dotnet and DwC-A_dotnet.Interactive to work with Darwin Core Archive files. + +Information on the dotnet libraries used here may be found at + +|Library|Link| +|---|---| +|DwC-A_dotnet|https://github.com/pjoiner/DwC-A_dotnet| +|DwC-A_dotnet.Interactive|https://github.com/pjoiner/DwC-A_dotnet.Interactive| + +Information on Darwin Core Archives may be found [here](https://dwc.tdwg.org/). + +#!markdown + +## Installation + +Use the #r magic command to install the libraries from NuGet. + +#!csharp + +#r "nuget:DwC-A_dotnet,0.6.0" +#r "nuget:DwC-A_dotnet.Interactive,0.1.8-Pre" + +#!markdown + +## Open An Archive +Use the `ArchiveReader` class to open the archive and provide the path to your archive. It is recommended that the archive be unzipped to a directory first to reduce the overhead of creating a temporary folder to unzip the archive. If you use the zip file remember to dispose of the temporary working directory at the end of your session by calling `archive.Dispose();` + +The test data we are using comes from the ["Insects from light trap (1992–2009), rooftop Zoological Museum, Copenhagen"](https://www.gbif.org/dataset/f506be53-9221-4b44-a41d-5aa0905ec216) dataset available for download from [gbif.org](https://www.gbif.org/). + +#!csharp + +using DwC_A; +using System.IO.Compression; +using System.IO; + +var outputPath = "./data/dwca-rooftop-v1.4"; +if(Directory.Exists(outputPath)) + Directory.Delete(outputPath, true); +ZipFile.ExtractToDirectory("./data/dwca-rooftop-v1.4.zip", outputPath); +var archive = new ArchiveReader(@"./data/dwca-rooftop-v1.4"); + +#!markdown + +## Archive MetaData +The interactive extensions library (`DwC-A_dotnet.Interactive`) registers kernel extensions to display various archive metadata by using the `display()` command or simply entering the object you are interested in at the end of a cell without a semicolon on the end. For example, to view the metadata for an archive enter `.MetaData` as shown below. The same can be done for an `IFileReader` instance to get a list of the term metadata for a file. + +#!csharp + +archive.MetaData + +#!csharp + +archive.CoreFile + +#!csharp + +archive.Extensions.GetFileReaderByFileName("occurrence.txt") + +#!markdown + +## Displaying Data + +Data from a file can be displayed using the `DataRows` property of an `IFileReader`. For example, the first 10 rows of the Core event file from the sample archive can be displayed as follows. + +#!csharp + +archive.CoreFile.DataRows.Take(50) + +#!markdown + +## Accessing Individual Fields + +The DataRows property of a FileReader can be enumerated using a `foreach` loop or LinQ queries. The individual fields of each row can be accessed by using an index or the name of the term associated with the field or column. + +Use the Terms class of the `DwC_A.Terms` namespace as a shortcut to typing in the fully qualified name of the term. + +#!csharp + +using DwC_A.Terms; + +foreach(var row in archive.CoreFile.DataRows.Take(1)) +{ + Console.Write($"type: {row[1]}\t"); //Use the index value to get the type column + Console.Write($"EventID: {row["http://rs.tdwg.org/dwc/terms/eventID"]}\t"); //USe the fully qualified name of the term + Console.WriteLine($"Event Date: {row[Terms.eventDate]}"); //Use the Terms class +} + +#!markdown + +## The Terms Command + +Use the `#!terms` magic command to list the available terms and a brief explanation of their use. + +#!csharp + +#!terms + +#!markdown + +## Query Data Using LinQ + +The following cell uses LinQ to gather a list of total individual counts of each genus for a specific sampling event. Change the number in the `.Skip(1)` line to see totals calculated for other events. + +#!csharp + +using DwC_A.Terms; + +//Retrieve the eventID from the event data file +var eventID = archive.CoreFile.DataRows + .Skip(5) //Change this number and run the cell again and to see the data for a new eventID + .Take(1) + .First()[Terms.eventID]; + +//Get an IFileReader for the occurrence data file +var occurrences = archive.Extensions.GetFileReaderByFileName("occurrence.txt"); + +var data = occurrences.DataRows + .Where(n => n[Terms.eventID] == eventID) + .GroupBy(n => n[Terms.genus]) + .Select(g => new{ + Genus = g.Key, + Count = g.Sum(c => int.Parse(c[Terms.individualCount])) + }); + +data diff --git a/notebooks/01_Introduction.ipynb b/notebooks/01_Introduction.ipynb deleted file mode 100644 index ffbf110..0000000 --- a/notebooks/01_Introduction.ipynb +++ /dev/null @@ -1,239 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using DwC-A_dotnet.Interactive\r\n", - "\r\n", - "This notebook describes how to use DwC-A_dotnet and DwC-A_dotnet.Interactive to work with Darwin Core Archive files.\r\n", - "\r\n", - "Information on the dotnet libraries used here may be found at \r\n", - "\r\n", - "|Library|Link|\r\n", - "|---|---|\r\n", - "|DwC-A_dotnet|https://github.com/pjoiner/DwC-A_dotnet|\r\n", - "|DwC-A_dotnet.Interactive|https://github.com/pjoiner/DwC-A_dotnet.Interactive|\r\n", - "\r\n", - "Information on Darwin Core Archives may be found [here](https://dwc.tdwg.org/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation\n", - "\n", - "Use the #r magic command to install the libraries from NuGet." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "#r \"nuget:DwC-A_dotnet,0.5.1\"\r\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.7-Pre\"\r\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Open An Archive\r\n", - "Use the `ArchiveReader` class to open the archive and provide the path to your archive. It is recommended that the archive be unzipped to a directory first to reduce the overhead of creating a temporary folder to unzip the archive. If you use the zip file remember to dispose of the temporary working directory at the end of your session by calling `archive.Dispose();`\r\n", - "\r\n", - "The test data we are using comes from the [\"Insects from light trap (1992–2009), rooftop Zoological Museum, Copenhagen\"](https://www.gbif.org/dataset/f506be53-9221-4b44-a41d-5aa0905ec216) dataset available for download from [gbif.org](https://www.gbif.org/)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using DwC_A;\r\n", - "using System.IO.Compression;\r\n", - "using System.IO;\r\n", - "\r\n", - "var outputPath = \"./data/dwca-rooftop-v1.4\";\r\n", - "if(Directory.Exists(outputPath)) \r\n", - " Directory.Delete(outputPath, true);\r\n", - "ZipFile.ExtractToDirectory(\"./data/dwca-rooftop-v1.4.zip\", outputPath);\r\n", - "var archive = new ArchiveReader(@\"./data/dwca-rooftop-v1.4\");" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Archive MetaData\r\n", - "The interactive extensions library (`DwC-A_dotnet.Interactive`) registers kernel extensions to display various archive metadata by using the `display()` command or simply entering the object you are interested in at the end of a cell without a semicolon on the end. For example, to view the metadata for an archive enter `.MetaData` as shown below. The same can be done for an `IFileReader` instance to get a list of the term metadata for a file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "archive.MetaData" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "archive.CoreFile" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "archive.Extensions.GetFileReaderByFileName(\"occurrence.txt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Displaying Data\r\n", - "\r\n", - "Data from a file can be displayed using the `DataRows` property of an `IFileReader`. For example, the first 10 rows of the Core event file from the sample archive can be displayed as follows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "archive.CoreFile.DataRows.Take(50)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Accessing Individual Fields\r\n", - "\r\n", - "The DataRows property of a FileReader can be enumerated using a `foreach` loop or LinQ queries. The individual fields of each row can be accessed by using an index or the name of the term associated with the field or column.\r\n", - "\r\n", - "Use the Terms class of the `DwC_A.Terms` namespace as a shortcut to typing in the fully qualified name of the term." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using DwC_A.Terms;\r\n", - "\r\n", - "foreach(var row in archive.CoreFile.DataRows.Take(1))\r\n", - "{\r\n", - " Console.Write($\"type: {row[1]}\\t\"); //Use the index value to get the type column\r\n", - " Console.Write($\"EventID: {row[\"http://rs.tdwg.org/dwc/terms/eventID\"]}\\t\"); //USe the fully qualified name of the term\r\n", - " Console.WriteLine($\"Event Date: {row[Terms.eventDate]}\"); //Use the Terms class\r\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Terms Command\r\n", - "\r\n", - "Use the `#!terms` magic command to list the available terms and a brief explanation of their use." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!terms" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query Data Using LinQ\n", - "\n", - "The following cell uses LinQ to gather a list of total individual counts of each genus for a specific sampling event. Change the number in the `.Skip(1)` line to see totals calculated for other events. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using DwC_A.Terms;\r\n", - "\r\n", - "//Retrieve the eventID from the event data file\r\n", - "var eventID = archive.CoreFile.DataRows\r\n", - " .Skip(5) //Change this number and run the cell again and to see the data for a new eventID\r\n", - " .Take(1)\r\n", - " .First()[Terms.eventID];\r\n", - "\r\n", - "//Get an IFileReader for the occurrence data file\r\n", - "var occurrences = archive.Extensions.GetFileReaderByFileName(\"occurrence.txt\");\r\n", - "\r\n", - "var data = occurrences.DataRows\r\n", - " .Where(n => n[Terms.eventID] == eventID)\r\n", - " .GroupBy(n => n[Terms.genus])\r\n", - " .Select(g => new{\r\n", - " Genus = g.Key,\r\n", - " Count = g.Sum(c => int.Parse(c[Terms.individualCount])) \r\n", - " }); \r\n", - "\r\n", - "data" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "name": ".net-csharp" - }, - "language_info": { - "file_extension": ".cs", - "mimetype": "text/x-csharp", - "name": "csharp", - "pygments_lexer": "csharp", - "version": "8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/notebooks/02_Formatting.dib b/notebooks/02_Formatting.dib new file mode 100644 index 0000000..8436650 --- /dev/null +++ b/notebooks/02_Formatting.dib @@ -0,0 +1,158 @@ +#!markdown + +# Formatting Output + +This notebook gives examples of various ways to format raw Darwin Core Archive data using [DwC-A_dotnet](https://github.com/pjoiner/DwC-A_dotnet) and and [DwC-A_dotnet.Interactive](https://github.com/pjoiner/DwC-A_dotnet.Interactive). + +First, we install the relevant libraries from Nuget. + +#!csharp + +#r "nuget:DwC-A_dotnet,0.6.0" +#r "nuget:DwC-A_dotnet.Interactive,0.1.8-Pre" + +#!markdown + +## Open The Archive + +We open the archive using an `ArchiveReader` object as described in the Introduction notebook. In this case we are using a dataset derived from a gbif query of butterflies of the family *Papilionidae*. This archive contains occurrence and multimedia data which also includes images. + +For more information on this dataset see [https://doi.org/10.15468/dl.jdftqs](https://doi.org/10.15468/dl.jdftqs). + +#!csharp + +using DwC_A; +using System.IO.Compression; +using System.IO; + +var outputPath = "./data/Papilionidae"; +if(Directory.Exists(outputPath)) + Directory.Delete(outputPath, true); +ZipFile.ExtractToDirectory("./data/Papilionidae.zip", outputPath); +var archive = new ArchiveReader(outputPath); +archive.MetaData + +#!csharp + +var occurrence = archive.CoreFile; +var multimedia = archive.Extensions.GetFileReaderByFileName("multimedia.txt"); +display(occurrence); +display(multimedia); + +#!markdown + +## Registering A Formatter + +A Formatter may be registered to display a row or single element as shown in the next cell. The easiest approach is to first define a record to hold the column or row information that is going to be displayed. + +The the PocketView api can be used to build more complex html. For example the following cell creates a new record of type `ScientificName` and displays the genus and species in italics. + +#!csharp + +using Microsoft.DotNet.Interactive.Formatting; + +using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; + +public record ScientificName(string genus, string species); + +Formatter.Register((scientificName, writer) => { + var output = div[style: "font-style: italic"]($"{scientificName.genus} {scientificName.species}"); + writer.Write(output); +}, "text/html" ); + +#!markdown + +## Scientific Name + +We can use the new ScientificName record type to format Occurrence data as follows. + +#!csharp + +using DwC_A.Terms; + +occurrence.DataRows.Take(10) + .Select(n => new{ + date = n[Terms.eventDate], + scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet]) + }) + +#!markdown + +## Links + +We can register a link formatter as well to provide create clickable urls. + +#!csharp + +public record Link(string url); + +Formatter.Register((link, writer) =>{ + var linkHtml = a[href: link.url](link.url); + writer.Write(linkHtml); +}, "text/html"); + +occurrence.DataRows.Take(10) + .Select(n => new{ + date = n[Terms.eventDate], + scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet]), + link = new Link(n[Terms.references]) + }) + +#!markdown + +## Displaying Images + +The multimedia file of this archive contains links to images hosted on the web. We can display those images inline with our queries by registering an image formatter. + +Here we will use the PocketView api to add styles to restrict the height of the displayed image to 100px and center the optional caption text. The formatter is registered using a new `ImageUrl` record type. + +#!csharp + +public record ImageUrl(string url, string caption = ""); + +Formatter.Register((image, writer) =>{ + var fig = figure[style: "text-align: center; padding: 5px;"]( + img[src: image.url, style: "height: 100px; margin-left: auto; margin-right: auto;"], + div[style: "font-style: italic"](image.caption) + ); + writer.Write(fig); +}, "text/html"); + +#!markdown + +Finally, we use the `ImageUrl` record type in our query to display a list of images along with the scientific name. + +#!csharp + +using DwC_A.Terms; + +var gbifId = "http://rs.gbif.org/terms/1.0/gbifID"; +var identifier = "http://purl.org/dc/terms/identifier"; + +var media = from o in occurrence.DataRows.Take(10) +join m in multimedia.DataRows on o[gbifId] equals m[gbifId] +select new{ + date = o[Terms.eventDate], + link = new Link(o[Terms.references]), + image = new ImageUrl(m[identifier], o[Terms.scientificName]) +}; + +media + +#!markdown + +## Creating a Gallery + +We can put this all together by registering a formatter for `IEnumerable` as follows and create a gallery of images. + +#!csharp + +Formatter.Register>((images, writer) =>{ + var figs = new List(); + images.ToList().ForEach(image => { + figs.Add(div[style: "float: left"](image)); + }); + writer.Write(div(figs)); +}, "text/html"); + +media.Select(n => n.image) diff --git a/notebooks/02_Formatting.ipynb b/notebooks/02_Formatting.ipynb deleted file mode 100644 index 8e80a1f..0000000 --- a/notebooks/02_Formatting.ipynb +++ /dev/null @@ -1,284 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Formatting Output\n", - "\n", - "This notebook gives examples of various ways to format raw Darwin Core Archive data using [DwC-A_dotnet](https://github.com/pjoiner/DwC-A_dotnet) and and [DwC-A_dotnet.Interactive](https://github.com/pjoiner/DwC-A_dotnet.Interactive).\n", - "\n", - "First, we install the relevant libraries from Nuget." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "#r \"nuget:DwC-A_dotnet,0.5.1\"\r\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.7-Pre\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Open The Archive\n", - "\n", - "We open the archive using an `ArchiveReader` object as described in the Introduction notebook. In this case we are using a dataset derived from a gbif query of butterflies of the family *Papilionidae*. This archive contains occurrence and multimedia data which also includes images.\n", - "\n", - "For more information on this dataset see [https://doi.org/10.15468/dl.jdftqs](https://doi.org/10.15468/dl.jdftqs)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using DwC_A;\n", - "using System.IO.Compression;\n", - "using System.IO;\n", - "\n", - "var outputPath = \"./data/Papilionidae\";\n", - "if(Directory.Exists(outputPath))\n", - " Directory.Delete(outputPath, true);\n", - "ZipFile.ExtractToDirectory(\"./data/Papilionidae.zip\", outputPath);\n", - "var archive = new ArchiveReader(outputPath);\n", - "archive.MetaData" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "var occurrence = archive.CoreFile;\n", - "var multimedia = archive.Extensions.GetFileReaderByFileName(\"multimedia.txt\");\n", - "display(occurrence);\n", - "display(multimedia);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Registering A Formatter\n", - "\n", - "A Formatter may be registered to display a row or single element as shown in the next cell. The easiest approach is to first define a record to hold the column or row information that is going to be displayed.\n", - "\n", - "The the PocketView api can be used to build more complex html. For example the following cell creates a new record of type `ScientificName` and displays the genus and species in italics." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using Microsoft.DotNet.Interactive.Formatting;\n", - "\n", - "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n", - "\n", - "public record ScientificName(string genus, string species);\n", - "\n", - "Formatter.Register((scientificName, writer) => {\n", - " var output = div[style: \"font-style: italic\"]($\"{scientificName.genus} {scientificName.species}\");\n", - " writer.Write(output);\n", - "}, \"text/html\" );" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Scientific Name\n", - "\n", - "We can use the new ScientificName record type to format Occurrence data as follows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using DwC_A.Terms;\n", - "\n", - "occurrence.DataRows.Take(10)\n", - " .Select(n => new{\n", - " date = n[Terms.eventDate],\n", - " scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet])\n", - " })" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Links\n", - "\n", - "We can register a link formatter as well to provide create clickable urls." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "public record Link(string url);\n", - "\n", - "Formatter.Register((link, writer) =>{\n", - " var linkHtml = a[href: link.url](link.url);\n", - " writer.Write(linkHtml);\n", - "}, \"text/html\");\n", - "\n", - "occurrence.DataRows.Take(10)\n", - " .Select(n => new{\n", - " date = n[Terms.eventDate],\n", - " scientificName = new ScientificName(n[Terms.genus], n[Terms.specificEpithet]),\n", - " link = new Link(n[Terms.references])\n", - " })" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Displaying Images\n", - "\n", - "The multimedia file of this archive contains links to images hosted on the web. We can display those images inline with our queries by registering an image formatter.\n", - "\n", - "Here we will use the PocketView api to add styles to restrict the height of the displayed image to 100px and center the optional caption text. The formatter is registered using a new `ImageUrl` record type." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "public record ImageUrl(string url, string caption = \"\");\n", - "\n", - "Formatter.Register((image, writer) =>{\n", - " var fig = figure[style: \"text-align: center; padding: 5px;\"](\n", - " img[src: image.url, style: \"height: 100px; margin-left: auto; margin-right: auto;\"],\n", - " div[style: \"font-style: italic\"](image.caption)\n", - " );\n", - " writer.Write(fig);\n", - "}, \"text/html\");" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we use the `ImageUrl` record type in our query to display a list of images along with the scientific name." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using DwC_A.Terms;\n", - "\n", - "var gbifId = \"http://rs.gbif.org/terms/1.0/gbifID\";\n", - "var identifier = \"http://purl.org/dc/terms/identifier\";\n", - "\n", - "var media = from o in occurrence.DataRows.Take(10)\n", - "join m in multimedia.DataRows on o[gbifId] equals m[gbifId]\n", - "select new{\n", - " date = o[Terms.eventDate],\n", - " link = new Link(o[Terms.references]),\n", - " image = new ImageUrl(m[identifier], o[Terms.scientificName])\n", - "}; \n", - "\n", - "media" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating a Gallery\n", - "\n", - "We can put this all together by registering a formatter for `IEnumerable` as follows and create a gallery of images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "Formatter.Register>((images, writer) =>{\n", - " var figs = new List();\n", - " images.ToList().ForEach(image => {\n", - " figs.Add(div[style: \"float: left\"](image));\n", - " });\n", - " writer.Write(div(figs));\n", - "}, \"text/html\");\n", - "\n", - "media.Select(n => n.image)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "name": ".net-csharp" - }, - "language_info": { - "file_extension": ".cs", - "mimetype": "text/x-csharp", - "name": "csharp", - "pygments_lexer": "csharp", - "version": "8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/notebooks/03_DataFrame.dib b/notebooks/03_DataFrame.dib new file mode 100644 index 0000000..92cc46f --- /dev/null +++ b/notebooks/03_DataFrame.dib @@ -0,0 +1,143 @@ +#!markdown + +# Using DataFrame and Plotly + +This notebook demonstrates using [DwC-A_dotnet](https://github.com/pjoiner/DwC-A_dotnet) with the [Microsoft.Data.Analysis](https://github.com/dotnet/machinelearning) framework and [XPlot Plotly](https://www.nuget.org/packages/XPlot.Plotly/). + +First we'll load the required libraries. + +#!csharp + +#r "nuget: XPlot.Plotly, 4.0.6" +#r "nuget: XPlot.Plotly.Interactive, 4.0.6" +#r "nuget: Microsoft.Data.Analysis, 0.19.0" +#r "nuget: DwC-A_dotnet, 0.6.0" +#r "nuget: DwC-a_dotnet.Interactive, 0.1.8-Pre" + +#!markdown + +## Formatting DataFrame for Display + +The following cell is used to register a DataFrame formatter for subsequent opterations so we can display our DateFrame objects. + +#!csharp + +using Microsoft.AspNetCore.Html; +using Microsoft.DotNet.Interactive.Formatting; +using Microsoft.Data.Analysis; + +using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; + +Formatter.Register((df, writer) => +{ + var headers = new List(); + headers.Add(th(i("index"))); + headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name))); + var rows = new List>(); + var take = 20; + for (var i = 0; i < Math.Min(take, df.Rows.Count); i++) + { + var cells = new List(); + cells.Add(td(i)); + foreach (var obj in df.Rows[i]) + { + cells.Add(td(obj)); + } + rows.Add(cells); + } + + var t = table( + thead( + headers), + tbody( + rows.Select( + r => tr(r)))); + + writer.Write(t); +}, "text/html"); + +#!markdown + +## Loading Archive + +We are going to use our [rooftop data set](https://www.gbif.org/dataset/f506be53-9221-4b44-a41d-5aa0905ec216) and query for the eventID of a sampling event from the CoreFile of the archive. + +#!csharp + +using DwC_A; +using DwC_A.Terms; +using DwC_A.Config; +using DwC_A.Factories; +using System.IO; + +var archiveFile = "./data/dwca-rooftop-v1.4.zip"; +var factory = new DefaultFactory(cfg => { + cfg.Add(cfg => { + cfg.OutputPath = "./dwca-rooftop-v1.4"; + if(Directory.Exists(cfg.OutputPath)){ + Directory.Delete(cfg.OutputPath, true); + } + }); +}); +var archive = new ArchiveReader(archiveFile, factory); +var eventFile = archive.CoreFile; +var occurrence = archive.Extensions.GetFileReaderByFileName("occurrence.txt"); + +var eventID = eventFile.DataRows.Skip(2).Select(row => row[Terms.eventID]).First(); +display(eventID); + +#!markdown + +## Convert IRow Collection to DataFrame + +Once we have an eventID we can query the occurrence data for that event using LinQ and load the results directly into some DataFrame columns. We are just creating columns for the species name and count of species from the occurrence data. + +#!csharp + +using Microsoft.Data.Analysis; + +var species = new StringDataFrameColumn("species", 0); +var counts = new PrimitiveDataFrameColumn("count"); + +occurrence.DataRows + .Where(row => row[Terms.eventID] == eventID) + .OrderBy(row => row[Terms.scientificName]) + .ToList() + .ForEach(row => { + species.Append(row[Terms.scientificName]); + counts.Append(int.Parse(row[Terms.individualCount])); + }); + +var df = new DataFrame(species, counts); +df.Info() + +#!markdown + +## Using DataFrame + +Now we can perform calculations on the data in our DataFrame object. For example, we can calculate the [Simpson Diversity Index](https://en.wikipedia.org/wiki/Diversity_index) for this sampling event using the following formula. + +$D = 1 - \frac {\sum_{}n(n-1)} {N(N-1)}$ + +#!csharp + +var sum = (int)counts.Sum(); +double D = 1 - (double)(int)(counts.Apply(n => n * (n - 1)).Sum()) / (sum * (sum - 1)); +display($"D = {D:0.00}") + +#!markdown + +## Plotly + +We can also use XPlot Plotly to display a column chart of our dataset. + +#!csharp + +using XPlot.Plotly; + +var kvp = df.Rows + .AsEnumerable() + .Select(row => Tuple.Create(row[0].ToString(), (int)row[1])); +var chart = Chart.Column(kvp); +chart.WithTitle($"Event: {eventID}"); +display(chart) diff --git a/notebooks/03_DataFrame.ipynb b/notebooks/03_DataFrame.ipynb deleted file mode 100644 index bd244da..0000000 --- a/notebooks/03_DataFrame.ipynb +++ /dev/null @@ -1,236 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using DataFrame and Plotly\n", - "\n", - "This notebook demonstrates using [DwC-A_dotnet](https://github.com/pjoiner/DwC-A_dotnet) with the [Microsoft.Data.Analysis](https://github.com/dotnet/machinelearning) framework and [XPlot Plotly](https://www.nuget.org/packages/XPlot.Plotly/). \n", - "\n", - "First we'll load the required libraries." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "#r \"nuget: XPlot.Plotly, 4.0.1\"\r\n", - "#r \"nuget: XPlot.Plotly.Interactive, 4.0.1\"\r\n", - "#r \"nuget: Microsoft.Data.Analysis, 0.4.0\"\r\n", - "#r \"nuget: DwC-A_dotnet, 0.5.1\"\r\n", - "#r \"nuget: DwC-a_dotnet.Interactive, 0.1.7-Pre\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Formatting DataFrame for Display\n", - "\n", - "The following cell is used to register a DataFrame formatter for subsequent opterations so we can display our DateFrame objects." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using Microsoft.AspNetCore.Html;\n", - "using Microsoft.DotNet.Interactive.Formatting;\n", - "using Microsoft.Data.Analysis;\n", - "\n", - "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\n", - "\n", - "Formatter.Register((df, writer) =>\n", - "{\n", - " var headers = new List();\n", - " headers.Add(th(i(\"index\")));\n", - " headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));\n", - " var rows = new List>();\n", - " var take = 20;\n", - " for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)\n", - " {\n", - " var cells = new List();\n", - " cells.Add(td(i));\n", - " foreach (var obj in df.Rows[i])\n", - " {\n", - " cells.Add(td(obj));\n", - " }\n", - " rows.Add(cells);\n", - " }\n", - "\n", - " var t = table(\n", - " thead(\n", - " headers),\n", - " tbody(\n", - " rows.Select(\n", - " r => tr(r))));\n", - "\n", - " writer.Write(t);\n", - "}, \"text/html\");" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading Archive\n", - "\n", - "We are going to use our [rooftop data set](https://www.gbif.org/dataset/f506be53-9221-4b44-a41d-5aa0905ec216) and query for the eventID of a sampling event from the CoreFile of the archive." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using DwC_A;\n", - "using DwC_A.Terms;\n", - "using DwC_A.Config;\n", - "using DwC_A.Factories;\n", - "using System.IO;\n", - "\n", - "var archiveFile = \"./data/dwca-rooftop-v1.4.zip\";\n", - "var factory = new DefaultFactory(cfg => {\n", - " cfg.Add(cfg => {\n", - " cfg.OutputPath = \"./dwca-rooftop-v1.4\";\n", - " if(Directory.Exists(cfg.OutputPath)){\n", - " Directory.Delete(cfg.OutputPath, true);\n", - " }\n", - " });\n", - "});\n", - "var archive = new ArchiveReader(archiveFile, factory);\n", - "var eventFile = archive.CoreFile;\n", - "var occurrence = archive.Extensions.GetFileReaderByFileName(\"occurrence.txt\");\n", - "\n", - "var eventID = eventFile.DataRows.Skip(2).Select(row => row[Terms.eventID]).First();\n", - "display(eventID);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convert IRow Collection to DataFrame\n", - "\n", - "Once we have an eventID we can query the occurrence data for that event using LinQ and load the results directly into some DataFrame columns. We are just creating columns for the species name and count of species from the occurrence data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using Microsoft.Data.Analysis;\n", - "\n", - "var species = new StringDataFrameColumn(\"species\", 0);\n", - "var counts = new PrimitiveDataFrameColumn(\"count\");\n", - "\n", - "occurrence.DataRows\n", - " .Where(row => row[Terms.eventID] == eventID)\n", - " .OrderBy(row => row[Terms.scientificName])\n", - " .ToList()\n", - " .ForEach(row => {\n", - " species.Append(row[Terms.scientificName]);\n", - " counts.Append(int.Parse(row[Terms.individualCount]));\n", - " });\n", - "\n", - "var df = new DataFrame(species, counts);\n", - "df.Info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using DataFrame\n", - "\n", - "Now we can perform calculations on the data in our DataFrame object. For example, we can calculate the [Simpson Diversity Index](https://en.wikipedia.org/wiki/Diversity_index) for this sampling event using the following formula.\n", - "\n", - "$D = 1 - \\frac {\\sum_{}n(n-1)} {N(N-1)}$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "var sum = (int)counts.Sum();\n", - "double D = 1 - (double)(int)(counts.Apply(n => n * (n - 1)).Sum()) / (sum * (sum - 1));\n", - "display($\"D = {D:0.00}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plotly\n", - "\n", - "We can also use XPlot Plotly to display a column chart of our dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - } - }, - "outputs": [], - "source": [ - "using XPlot.Plotly;\n", - "\n", - "var kvp = df.Rows\n", - " .AsEnumerable()\n", - " .Select(row => Tuple.Create(row[0].ToString(), (int)row[1]));\n", - "var chart = Chart.Column(kvp);\n", - "chart.WithTitle($\"Event: {eventID}\");\n", - "display(chart)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "name": ".net-csharp" - }, - "language_info": { - "file_extension": ".cs", - "mimetype": "text/x-csharp", - "name": "csharp", - "pygments_lexer": "csharp", - "version": "8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/notebooks/04_FSharp.dib b/notebooks/04_FSharp.dib new file mode 100644 index 0000000..56bf621 --- /dev/null +++ b/notebooks/04_FSharp.dib @@ -0,0 +1,71 @@ +#!markdown + +# Use DwC-A_dotnet with F# + +DwC-A_dotnet can be used with F# as well as C#. The NuGet library installation and ArchiveReader/FileReader formatters work in the same way as they do for C#. + +Here we'll use the Papilionidae dataset to demonstrate reading latitude and longitude information from the occurrence data file and plot it on a map of Texas with Plotly. + +**Import Note:** If you are using this notebook from Binder make sure to select **Kernel -> Change Kernel -> .NET (F#)** before running any of the cells below. + +#!fsharp + +#r "nuget:Plotly.NET,2.0.0-preview.15" +#r "nuget:Plotly.NET.Interactive,2.0.0-preview.15" +#r "nuget:FSharp.Data,4.2.5" +#r "nuget:DwC-A_dotnet,0.6.0" +#r "nuget:DwC-A_dotnet.Interactive,0.1.8-Pre" + +#!fsharp + +open DwC_A +open DwC_A.Terms +open DwC_A.Factories +open DwC_A.Config +open System.IO + +let archiveFile = "./data/Papilionidae.zip" +let factory = new DefaultFactory(fun cfg -> + cfg.Add( fun cfg -> + cfg.OutputPath <- "./Papilionidae" + if(Directory.Exists(cfg.OutputPath)) then + Directory.Delete(cfg.OutputPath, true) )) +let archive = new ArchiveReader(archiveFile, factory); + +let occurrence = archive.CoreFile; +occurrence + +#!fsharp + +open System.Linq +open Plotly.NET + +let lonlat = occurrence.DataRows + .Where(fun row -> row.[Terms.decimalLongitude] <> null && row.[Terms.decimalLatitude] <> null) + .Select(fun row -> ( $"{row.[Terms.genus]} {row.[Terms.specificEpithet]}", + row.[Terms.decimalLongitude] |> double, + row.[Terms.decimalLatitude] |> double) ) + .GroupBy(fun row -> match row with (a, b, c) -> a) + .Select(fun group -> (group.Key, group.Select(fun row -> match row with (a, b, c) -> (b, c)))) + +let geo = lonlat.Select(fun row -> + match row with (a, b) -> + Chart.ScatterGeo(b, + mode=StyleParam.Mode.Markers, + ShowLegend = true) + |> Chart.withMarkerStyle(Size = 2) + |> Chart.withTraceName(a)) + |> Chart.combine + +let map = geo |> Chart.withGeoStyle( + FitBounds = StyleParam.GeoFitBounds.GeoJson, + Scope = StyleParam.GeoScope.Usa, + ShowLakes = true, + ShowRivers = true, + ShowLand = true, + LandColor = Color.fromHex("#f1f1f1") + ) + |> Chart.withSize(height = 500.0, width = 800.0) + |> Chart.withTitle(title = "Papilionidae of Texas") + +map diff --git a/notebooks/04_FSharp.ipynb b/notebooks/04_FSharp.ipynb deleted file mode 100644 index 25be149..0000000 --- a/notebooks/04_FSharp.ipynb +++ /dev/null @@ -1,116 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Use DwC-A_dotnet with F#\r\n", - "\r\n", - "DwC-A_dotnet can be used with F# as well as C#. The NuGet library installation and ArchiveReader/FileReader formatters work in the same way as they do for C#.\r\n", - "\r\n", - "Here we'll use the Papilionidae dataset to demonstrate reading latitude and longitude information from the occurrence data file and plot it on a map of Texas with Plotly.\r\n", - "\r\n", - "**Import Note:** If you are using this notebook from Binder make sure to select **Kernel -> Change Kernel -> .NET (F#)** before running any of the cells below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#r \"nuget:Plotly.NET,2.0.0-beta8\"\r\n", - "#r \"nuget:Plotly.NET.Interactive,2.0.0-beta8\"\r\n", - "#r \"nuget:FSharp.Data\"\r\n", - "#r \"nuget:DwC-A_dotnet,0.5.1\"\r\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.7-Pre\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "open DwC_A\r\n", - "open DwC_A.Terms\r\n", - "open DwC_A.Factories\r\n", - "open DwC_A.Config\r\n", - "open System.IO\r\n", - "\r\n", - "let archiveFile = \"./data/Papilionidae.zip\"\r\n", - "let factory = new DefaultFactory(fun cfg -> \r\n", - " cfg.Add( fun cfg -> \r\n", - " cfg.OutputPath <- \"./Papilionidae\"\r\n", - " if(Directory.Exists(cfg.OutputPath)) then\r\n", - " Directory.Delete(cfg.OutputPath, true) ))\r\n", - "let archive = new ArchiveReader(archiveFile, factory);\r\n", - "\r\n", - "let occurrence = archive.CoreFile;\r\n", - "occurrence" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "open System.Linq\r\n", - "open Plotly.NET\r\n", - "\r\n", - "let lonlat = occurrence.DataRows\r\n", - " .Where(fun row -> row.[Terms.decimalLongitude] <> null && row.[Terms.decimalLatitude] <> null)\r\n", - " .Select(fun row -> ( $\"{row.[Terms.genus]} {row.[Terms.specificEpithet]}\", \r\n", - " row.[Terms.decimalLongitude] |> double, \r\n", - " row.[Terms.decimalLatitude] |> double) )\r\n", - " .GroupBy(fun row -> match row with (a, b, c) -> a)\r\n", - " .Select(fun group -> (group.Key, group.Select(fun row -> match row with (a, b, c) -> (b, c))))\r\n", - "\r\n", - "let geo = lonlat.Select(fun row -> \r\n", - " match row with (a, b) -> \r\n", - " Chart.ScatterGeo(b, \r\n", - " mode=StyleParam.Mode.Markers,\r\n", - " Showlegend = true)\r\n", - " |> Chart.withMarkerStyle(Size = 2) \r\n", - " |> Chart.withTraceName(a))\r\n", - " |> Chart.Combine\r\n", - "\r\n", - "let map = geo |> Chart.withMapStyle(\r\n", - " FitBounds = StyleParam.GeoFitBounds.GeoJson,\r\n", - " Scope = StyleParam.GeoScope.Usa,\r\n", - " ShowLakes = true,\r\n", - " ShowRivers = true,\r\n", - " ShowLand = true,\r\n", - " LandColor = \"#f1f1f1\"\r\n", - " )\r\n", - " |> Chart.withSize(height = 500.0, width = 800.0)\r\n", - " |> Chart.withTitle(title = \"Papilionidae of Texas\")\r\n", - "\r\n", - "map " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "name": ".net-csharp" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "orig_nbformat": 2 - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file diff --git a/notebooks/05_Eml.dib b/notebooks/05_Eml.dib new file mode 100644 index 0000000..08f055c --- /dev/null +++ b/notebooks/05_Eml.dib @@ -0,0 +1,100 @@ +#!markdown + +# Ecological Metadata Language (EML) + +Some archives include additional metadata stored in the form of an XML file that uses [Ecological Metadata Language (EML)](https://eml.ecoinformatics.org/). This path to this file can be retrieved from the `ArchiveReader.MetaData`. This notebook describes how information can be retrieved from the dataset metadata and presented. + +#!csharp + +#r "nuget:DwC-A_dotnet,0.6.0" +#r "nuget:DwC-A_dotnet.Interactive,0.1.8-Pre" + +#!csharp + +using DwC_A; +using System.IO; + +var archive = new ArchiveReader("./data/dwca-rooftop-v1.4.zip"); +var emlPath = Path.Combine(archive.OutputPath, archive.MetaData.Metadata); +emlPath + +#!markdown + +## Using XPath Queries + +Using an XPathNavigator object we can query into any part of the document and retrieve single elements or sections. + +For some tips on finding the information you need see the document [Best Practices for Dataset Metadata in Ecological Metadata Language (EML)](https://environmentaldatainitiative.org/five-phases-of-data-publishing/phase-3/metadata-best-practices/) available for download from [https://environmentaldatainitiative.org](https://environmentaldatainitiative.org). + +#!csharp + +using System.Xml; +using System.Xml.XPath; + +var docNav = new XPathDocument(emlPath); +var nav = docNav.CreateNavigator(); +XmlNamespaceManager nsmgr = new XmlNamespaceManager(nav.NameTable); +nsmgr.AddNamespace("eml", "eml://ecoinformatics.org/eml-2.1.1"); +var root = nav.SelectSingleNode("eml:eml", nsmgr); + +var title = root.SelectSingleNode("dataset/title", nsmgr).ToString(); +title + +#!markdown + +## Formatter Shortcut + +The following Formatters can be used as a shortcut to display the results of simple queries. Try some of the following in the cell following the next. + +* dataset/title +* dataset/abstract/para +* dataset/keywordSet/keyword +* additionalMetadata/metadata/gbif/citation + +#!csharp + +using Microsoft.DotNet.Interactive.Formatting; +using System.Xml.XPath; + +using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; + +Formatter.Register((nav, writer) => { + var output = p(nav.ToString()); + writer.Write(output); +}, "text/html"); + +Formatter.Register((iter, writer) => { + var outputList = new List(); + foreach(var node in iter) + { + outputList.Add(p(node.ToString())); + } + writer.Write(div(outputList)); +}, "text/html"); + +#!csharp + +root.Select("dataset/title", nsmgr) + +#!markdown + +## Using XSL Stylesheets + +We can also perform XSL Transforms on the data to produce an HTML document using the XslCompiledTransform class as follows. Here we are using a the stylesheet eml.xslt to retrieve the document title, abstract and citation information. + +#!csharp + +using System.Xml.Xsl; +using System.Xml; +using Microsoft.AspNetCore.Html; + +using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; + +using(var stringWriter = new StringWriter()) +using(var writer = new XmlTextWriter(stringWriter)) +{ + XslCompiledTransform xslt = new XslCompiledTransform(); + xslt.Load("./data/eml.xslt"); + xslt.Transform(emlPath, writer); + display(new HtmlString(stringWriter.ToString())); +} diff --git a/notebooks/05_Eml.ipynb b/notebooks/05_Eml.ipynb deleted file mode 100644 index 6c90608..0000000 --- a/notebooks/05_Eml.ipynb +++ /dev/null @@ -1,166 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Ecological Metadata Language (EML)\r\n", - "\r\n", - "Some archives include additional metadata stored in the form of an XML file that uses [Ecological Metadata Language (EML)](https://eml.ecoinformatics.org/). This path to this file can be retrieved from the `ArchiveReader.MetaData`. This notebook describes how information can be retrieved from the dataset metadata and presented." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#r \"nuget:DwC-A_dotnet,0.5.1\"\r\n", - "#r \"nuget:DwC-A_dotnet.Interactive,0.1.7-Pre\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using DwC_A;\r\n", - "using System.IO;\r\n", - "\r\n", - "var archive = new ArchiveReader(\"./data/dwca-rooftop-v1.4.zip\");\r\n", - "var emlPath = Path.Combine(archive.OutputPath, archive.MetaData.Metadata);\r\n", - "emlPath" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using XPath Queries\r\n", - "\r\n", - "Using an XPathNavigator object we can query into any part of the document and retrieve single elements or sections.\r\n", - "\r\n", - "For some tips on finding the information you need see the document [Best Practices for Dataset Metadata in Ecological Metadata Language (EML)](https://environmentaldatainitiative.org/five-phases-of-data-publishing/phase-3/metadata-best-practices/) available for download from [https://environmentaldatainitiative.org](https://environmentaldatainitiative.org)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using System.Xml;\r\n", - "using System.Xml.XPath;\r\n", - "\r\n", - "var docNav = new XPathDocument(emlPath);\r\n", - "var nav = docNav.CreateNavigator();\r\n", - "XmlNamespaceManager nsmgr = new XmlNamespaceManager(nav.NameTable);\r\n", - "nsmgr.AddNamespace(\"eml\", \"eml://ecoinformatics.org/eml-2.1.1\");\r\n", - "var root = nav.SelectSingleNode(\"eml:eml\", nsmgr);\r\n", - "\r\n", - "var title = root.SelectSingleNode(\"dataset/title\", nsmgr).ToString();\r\n", - "title" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Formatter Shortcut\r\n", - "\r\n", - "The following Formatters can be used as a shortcut to display the results of simple queries. Try some of the following in the cell following the next.\r\n", - "\r\n", - "* dataset/title\r\n", - "* dataset/abstract/para\r\n", - "* dataset/keywordSet/keyword\r\n", - "* additionalMetadata/metadata/gbif/citation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using Microsoft.DotNet.Interactive.Formatting;\r\n", - "using System.Xml.XPath;\r\n", - "\r\n", - "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\r\n", - "\r\n", - "Formatter.Register((nav, writer) => {\r\n", - " var output = p(nav.ToString());\r\n", - " writer.Write(output);\r\n", - "}, \"text/html\");\r\n", - "\r\n", - "Formatter.Register((iter, writer) => {\r\n", - " var outputList = new List();\r\n", - " foreach(var node in iter)\r\n", - " {\r\n", - " outputList.Add(p(node.ToString()));\r\n", - " }\r\n", - " writer.Write(div(outputList));\r\n", - "}, \"text/html\");\r\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "root.Select(\"dataset/title\", nsmgr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using XSL Stylesheets\r\n", - "\r\n", - "We can also perform XSL Transforms on the data to produce an HTML document using the XslCompiledTransform class as follows. Here we are using a the stylesheet eml.xslt to retrieve the document title, abstract and citation information." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "using System.Xml.Xsl;\r\n", - "using System.Xml;\r\n", - "using Microsoft.AspNetCore.Html;\r\n", - "\r\n", - "using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;\r\n", - "\r\n", - "using(var stringWriter = new StringWriter())\r\n", - "using(var writer = new XmlTextWriter(stringWriter))\r\n", - "{\r\n", - " XslCompiledTransform xslt = new XslCompiledTransform();\r\n", - " xslt.Load(\"./data/eml.xslt\");\r\n", - " xslt.Transform(emlPath, writer);\r\n", - " display(new HtmlString(stringWriter.ToString()));\r\n", - "}" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "metadata": { - "interpreter": { - "hash": "06a136a4bec6dd37550f67c12f0bc3840a5b46fcffdfeab5f0089641cb553d70" - } - }, - "name": ".net-csharp" - }, - "language_info": { - "name": "dotnet-interactive.csharp", - "version": "3.8.8-final" - }, - "orig_nbformat": 2 - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file diff --git a/notebooks/06_DataMapping.dib b/notebooks/06_DataMapping.dib new file mode 100644 index 0000000..95ff3c3 --- /dev/null +++ b/notebooks/06_DataMapping.dib @@ -0,0 +1,73 @@ +#!markdown + +# Data Conversion and Mapping + +File data fields can be converted to strongly typed objects using one of the Convert extension methods of the IRow interface. Documentation can be found [here](https://github.com/pjoiner/DwC-A_dotnet/wiki/Type-Conversion). + +We'll utilize the [DwC-A_dotnet.Mapping](https://github.com/pjoiner/DwC-A_dotnet.Mapping) extension library to map IRow fields to a model class. + +#!csharp + +#r "nuget:DwC-A_dotnet,0.6.0" +#r "nuget:DwC-A_dotnet.Mapping,0.6.1" +#r "nuget:DwC-A_dotnet.Interactive,0.1.8-Pre" + +#!markdown + +## Model Class + +First create the class that you wish to map values to. In this instance we are going to create an Occurrence class to display the scientific name and coordinates of the observation. + +#!csharp + +using DwC_A; + +var outputPath = "./data/Papilionidae.zip"; +var archive = new ArchiveReader(outputPath); + +archive + +#!markdown + +## Class Definition + +The next cell loads a class definition to map the data into. This code can be created by hand or generated using the dotnet tool [dwca-codegen](https://www.nuget.org/packages/dwca-codegen/) which is available on NuGet. For more information on using [dwca-codegen](https://github.com/pjoiner/DwC-A_dotnet.Mapping/tree/master/src/dwca-codegen) see the [README.md](https://github.com/pjoiner/DwC-A_dotnet.Mapping/blob/master/src/dwca-codegen/README.md) file for that package / repo. + +**Note:** Make sure you set the namespace to "" when generating class files from an archive for use in dotnet interactive since namespaces aren't allowed. + +#!csharp + +#load "./code/Occurrence.cs" + +#!markdown + +## Mapper + +Next we create a mapper and define a method for mapping the fields of the IRow to the class. + +#!csharp + +using DwC_A.Mapping; +using DwC_A.Terms; +using DwC_A.Extensions; + +var mapper = MapperFactory.CreateMapper((o, row) => { + o.GbifID = row["http://rs.gbif.org/terms/1.0/gbifID"]; + o.Identifier = row["http://purl.org/dc/terms/identifier"]; + o.ScientificName = row[Terms.scientificName]; + o.DecimalLongitude = row.Convert(Terms.decimalLongitude); + o.DecimalLatitude = row.Convert(Terms.decimalLatitude); +}); + +#!markdown + +## Map + +Finally, we map the data using the Map methods of either the IRow or IFileReader interfaces. + +#!csharp + +archive.CoreFile + .Map(mapper) + .Take(100) + .Select(n => new{n.GbifID, n.Identifier, n.ScientificName, n.DecimalLatitude, n.DecimalLongitude}) diff --git a/notebooks/Code/Multimedia.cs b/notebooks/Code/Multimedia.cs new file mode 100644 index 0000000..ad172e1 --- /dev/null +++ b/notebooks/Code/Multimedia.cs @@ -0,0 +1,18 @@ +public partial class Multimedia +{ + public string GbifID { get; set; } + public string Type { get; set; } + public string Format { get; set; } + public string Identifier { get; set; } + public string References { get; set; } + public string Title { get; set; } + public string Description { get; set; } + public string Source { get; set; } + public string Audience { get; set; } + public string Created { get; set; } + public string Creator { get; set; } + public string Contributor { get; set; } + public string Publisher { get; set; } + public string License { get; set; } + public string RightsHolder { get; set; } +} \ No newline at end of file diff --git a/notebooks/Code/Occurrence.cs b/notebooks/Code/Occurrence.cs new file mode 100644 index 0000000..fcc2dea --- /dev/null +++ b/notebooks/Code/Occurrence.cs @@ -0,0 +1,253 @@ +public partial class Occurrence +{ + public string GbifID { get; set; } + public string Abstract { get; set; } + public string AccessRights { get; set; } + public string AccrualMethod { get; set; } + public string AccrualPeriodicity { get; set; } + public string AccrualPolicy { get; set; } + public string Alternative { get; set; } + public string Audience { get; set; } + public string Available { get; set; } + public string BibliographicCitation { get; set; } + public string ConformsTo { get; set; } + public string Contributor { get; set; } + public string Coverage { get; set; } + public string Created { get; set; } + public string Creator { get; set; } + public string Date { get; set; } + public string DateAccepted { get; set; } + public string DateCopyrighted { get; set; } + public string DateSubmitted { get; set; } + public string Description { get; set; } + public string EducationLevel { get; set; } + public string Extent { get; set; } + public string Format { get; set; } + public string HasFormat { get; set; } + public string HasPart { get; set; } + public string HasVersion { get; set; } + public string Identifier { get; set; } + public string InstructionalMethod { get; set; } + public string IsFormatOf { get; set; } + public string IsPartOf { get; set; } + public string IsReferencedBy { get; set; } + public string IsReplacedBy { get; set; } + public string IsRequiredBy { get; set; } + public string IsVersionOf { get; set; } + public string Issued { get; set; } + public string Language { get; set; } + public string License { get; set; } + public string Mediator { get; set; } + public string Medium { get; set; } + public string Modified { get; set; } + public string Provenance { get; set; } + public string Publisher { get; set; } + public string References { get; set; } + public string Relation { get; set; } + public string Replaces { get; set; } + public string Requires { get; set; } + public string Rights { get; set; } + public string RightsHolder { get; set; } + public string Source { get; set; } + public string Spatial { get; set; } + public string Subject { get; set; } + public string TableOfContents { get; set; } + public string Temporal { get; set; } + public string Title { get; set; } + public string Type { get; set; } + public string Valid { get; set; } + public string InstitutionID { get; set; } + public string CollectionID { get; set; } + public string DatasetID { get; set; } + public string InstitutionCode { get; set; } + public string CollectionCode { get; set; } + public string DatasetName { get; set; } + public string OwnerInstitutionCode { get; set; } + public string BasisOfRecord { get; set; } + public string InformationWithheld { get; set; } + public string DataGeneralizations { get; set; } + public string DynamicProperties { get; set; } + public string OccurrenceID { get; set; } + public string CatalogNumber { get; set; } + public string RecordNumber { get; set; } + public string RecordedBy { get; set; } + public int? IndividualCount { get; set; } + public string OrganismQuantity { get; set; } + public string OrganismQuantityType { get; set; } + public string Sex { get; set; } + public string LifeStage { get; set; } + public string ReproductiveCondition { get; set; } + public string Behavior { get; set; } + public string EstablishmentMeans { get; set; } + public string OccurrenceStatus { get; set; } + public string Preparations { get; set; } + public string Disposition { get; set; } + public string AssociatedReferences { get; set; } + public string AssociatedSequences { get; set; } + public string AssociatedTaxa { get; set; } + public string OtherCatalogNumbers { get; set; } + public string OccurrenceRemarks { get; set; } + public string OrganismID { get; set; } + public string OrganismName { get; set; } + public string OrganismScope { get; set; } + public string AssociatedOccurrences { get; set; } + public string AssociatedOrganisms { get; set; } + public string PreviousIdentifications { get; set; } + public string OrganismRemarks { get; set; } + public string MaterialSampleID { get; set; } + public string EventID { get; set; } + public string ParentEventID { get; set; } + public string FieldNumber { get; set; } + public string EventDate { get; set; } + public string EventTime { get; set; } + public int? StartDayOfYear { get; set; } + public int? EndDayOfYear { get; set; } + public int? Year { get; set; } + public int? Month { get; set; } + public int? Day { get; set; } + public string VerbatimEventDate { get; set; } + public string Habitat { get; set; } + public string SamplingProtocol { get; set; } + public string SamplingEffort { get; set; } + public int? SampleSizeValue { get; set; } + public string SampleSizeUnit { get; set; } + public string FieldNotes { get; set; } + public string EventRemarks { get; set; } + public string LocationID { get; set; } + public string HigherGeographyID { get; set; } + public string HigherGeography { get; set; } + public string Continent { get; set; } + public string WaterBody { get; set; } + public string IslandGroup { get; set; } + public string Island { get; set; } + public string CountryCode { get; set; } + public string StateProvince { get; set; } + public string County { get; set; } + public string Municipality { get; set; } + public string Locality { get; set; } + public string VerbatimLocality { get; set; } + public string VerbatimElevation { get; set; } + public string VerbatimDepth { get; set; } + public double? MinimumDistanceAboveSurfaceInMeters { get; set; } + public double? MaximumDistanceAboveSurfaceInMeters { get; set; } + public string LocationAccordingTo { get; set; } + public string LocationRemarks { get; set; } + public double? DecimalLatitude { get; set; } + public double? DecimalLongitude { get; set; } + public double? CoordinateUncertaintyInMeters { get; set; } + public double? CoordinatePrecision { get; set; } + public double? PointRadiusSpatialFit { get; set; } + public string VerbatimCoordinateSystem { get; set; } + public string VerbatimSRS { get; set; } + public string FootprintWKT { get; set; } + public string FootprintSRS { get; set; } + public double? FootprintSpatialFit { get; set; } + public string GeoreferencedBy { get; set; } + public string GeoreferencedDate { get; set; } + public string GeoreferenceProtocol { get; set; } + public string GeoreferenceSources { get; set; } + public string GeoreferenceVerificationStatus { get; set; } + public string GeoreferenceRemarks { get; set; } + public string GeologicalContextID { get; set; } + public string EarliestEonOrLowestEonothem { get; set; } + public string LatestEonOrHighestEonothem { get; set; } + public string EarliestEraOrLowestErathem { get; set; } + public string LatestEraOrHighestErathem { get; set; } + public string EarliestPeriodOrLowestSystem { get; set; } + public string LatestPeriodOrHighestSystem { get; set; } + public string EarliestEpochOrLowestSeries { get; set; } + public string LatestEpochOrHighestSeries { get; set; } + public string EarliestAgeOrLowestStage { get; set; } + public string LatestAgeOrHighestStage { get; set; } + public string LowestBiostratigraphicZone { get; set; } + public string HighestBiostratigraphicZone { get; set; } + public string LithostratigraphicTerms { get; set; } + public string Group { get; set; } + public string Formation { get; set; } + public string Member { get; set; } + public string Bed { get; set; } + public string IdentificationID { get; set; } + public string IdentificationQualifier { get; set; } + public string TypeStatus { get; set; } + public string IdentifiedBy { get; set; } + public DateTime? DateIdentified { get; set; } + public string IdentificationReferences { get; set; } + public string IdentificationVerificationStatus { get; set; } + public string IdentificationRemarks { get; set; } + public string TaxonID { get; set; } + public string ScientificNameID { get; set; } + public string AcceptedNameUsageID { get; set; } + public string ParentNameUsageID { get; set; } + public string OriginalNameUsageID { get; set; } + public string NameAccordingToID { get; set; } + public string NamePublishedInID { get; set; } + public string TaxonConceptID { get; set; } + public string ScientificName { get; set; } + public string AcceptedNameUsage { get; set; } + public string ParentNameUsage { get; set; } + public string OriginalNameUsage { get; set; } + public string NameAccordingTo { get; set; } + public string NamePublishedIn { get; set; } + public int? NamePublishedInYear { get; set; } + public string HigherClassification { get; set; } + public string Kingdom { get; set; } + public string Phylum { get; set; } + public string Class { get; set; } + public string Order { get; set; } + public string Family { get; set; } + public string Genus { get; set; } + public string Subgenus { get; set; } + public string SpecificEpithet { get; set; } + public string InfraspecificEpithet { get; set; } + public string TaxonRank { get; set; } + public string VerbatimTaxonRank { get; set; } + public string VernacularName { get; set; } + public string NomenclaturalCode { get; set; } + public string TaxonomicStatus { get; set; } + public string NomenclaturalStatus { get; set; } + public string TaxonRemarks { get; set; } + public string DatasetKey { get; set; } + public string PublishingCountry { get; set; } + public string LastInterpreted { get; set; } + public string Elevation { get; set; } + public string ElevationAccuracy { get; set; } + public string Depth { get; set; } + public string DepthAccuracy { get; set; } + public string DistanceAboveSurface { get; set; } + public string DistanceAboveSurfaceAccuracy { get; set; } + public string Issue { get; set; } + public string MediaType { get; set; } + public string HasCoordinate { get; set; } + public string HasGeospatialIssues { get; set; } + public string TaxonKey { get; set; } + public string AcceptedTaxonKey { get; set; } + public string KingdomKey { get; set; } + public string PhylumKey { get; set; } + public string ClassKey { get; set; } + public string OrderKey { get; set; } + public string FamilyKey { get; set; } + public string GenusKey { get; set; } + public string SubgenusKey { get; set; } + public string SpeciesKey { get; set; } + public string Species { get; set; } + public string GenericName { get; set; } + public string AcceptedScientificName { get; set; } + public string VerbatimScientificName { get; set; } + public string TypifiedName { get; set; } + public string Protocol { get; set; } + public string LastParsed { get; set; } + public string LastCrawled { get; set; } + public string Repatriated { get; set; } + public string RelativeOrganismQuantity { get; set; } + public string RecordedByID { get; set; } + public string IdentifiedByID { get; set; } + public string Level0Gid { get; set; } + public string Level0Name { get; set; } + public string Level1Gid { get; set; } + public string Level1Name { get; set; } + public string Level2Gid { get; set; } + public string Level2Name { get; set; } + public string Level3Gid { get; set; } + public string Level3Name { get; set; } + public string IucnRedListCategory { get; set; } +} \ No newline at end of file diff --git a/notebooks/Code/Verbatim.cs b/notebooks/Code/Verbatim.cs new file mode 100644 index 0000000..5f45547 --- /dev/null +++ b/notebooks/Code/Verbatim.cs @@ -0,0 +1,222 @@ +public partial class Verbatim +{ + public string GbifID { get; set; } + public string Abstract { get; set; } + public string AccessRights { get; set; } + public string AccrualMethod { get; set; } + public string AccrualPeriodicity { get; set; } + public string AccrualPolicy { get; set; } + public string Alternative { get; set; } + public string Audience { get; set; } + public string Available { get; set; } + public string BibliographicCitation { get; set; } + public string ConformsTo { get; set; } + public string Contributor { get; set; } + public string Coverage { get; set; } + public string Created { get; set; } + public string Creator { get; set; } + public string Date { get; set; } + public string DateAccepted { get; set; } + public string DateCopyrighted { get; set; } + public string DateSubmitted { get; set; } + public string Description { get; set; } + public string EducationLevel { get; set; } + public string Extent { get; set; } + public string Format { get; set; } + public string HasFormat { get; set; } + public string HasPart { get; set; } + public string HasVersion { get; set; } + public string Identifier { get; set; } + public string InstructionalMethod { get; set; } + public string IsFormatOf { get; set; } + public string IsPartOf { get; set; } + public string IsReferencedBy { get; set; } + public string IsReplacedBy { get; set; } + public string IsRequiredBy { get; set; } + public string IsVersionOf { get; set; } + public string Issued { get; set; } + public string Language { get; set; } + public string License { get; set; } + public string Mediator { get; set; } + public string Medium { get; set; } + public string Modified { get; set; } + public string Provenance { get; set; } + public string Publisher { get; set; } + public string References { get; set; } + public string Relation { get; set; } + public string Replaces { get; set; } + public string Requires { get; set; } + public string Rights { get; set; } + public string RightsHolder { get; set; } + public string Source { get; set; } + public string Spatial { get; set; } + public string Subject { get; set; } + public string TableOfContents { get; set; } + public string Temporal { get; set; } + public string Title { get; set; } + public string Type { get; set; } + public string Valid { get; set; } + public string InstitutionID { get; set; } + public string CollectionID { get; set; } + public string DatasetID { get; set; } + public string InstitutionCode { get; set; } + public string CollectionCode { get; set; } + public string DatasetName { get; set; } + public string OwnerInstitutionCode { get; set; } + public string BasisOfRecord { get; set; } + public string InformationWithheld { get; set; } + public string DataGeneralizations { get; set; } + public string DynamicProperties { get; set; } + public string OccurrenceID { get; set; } + public string CatalogNumber { get; set; } + public string RecordNumber { get; set; } + public string RecordedBy { get; set; } + public int? IndividualCount { get; set; } + public string OrganismQuantity { get; set; } + public string OrganismQuantityType { get; set; } + public string Sex { get; set; } + public string LifeStage { get; set; } + public string ReproductiveCondition { get; set; } + public string Behavior { get; set; } + public string EstablishmentMeans { get; set; } + public string OccurrenceStatus { get; set; } + public string Preparations { get; set; } + public string Disposition { get; set; } + public string AssociatedMedia { get; set; } + public string AssociatedReferences { get; set; } + public string AssociatedSequences { get; set; } + public string AssociatedTaxa { get; set; } + public string OtherCatalogNumbers { get; set; } + public string OccurrenceRemarks { get; set; } + public string OrganismID { get; set; } + public string OrganismName { get; set; } + public string OrganismScope { get; set; } + public string AssociatedOccurrences { get; set; } + public string AssociatedOrganisms { get; set; } + public string PreviousIdentifications { get; set; } + public string OrganismRemarks { get; set; } + public string MaterialSampleID { get; set; } + public string EventID { get; set; } + public string ParentEventID { get; set; } + public string FieldNumber { get; set; } + public string EventDate { get; set; } + public string EventTime { get; set; } + public int? StartDayOfYear { get; set; } + public int? EndDayOfYear { get; set; } + public int? Year { get; set; } + public int? Month { get; set; } + public int? Day { get; set; } + public string VerbatimEventDate { get; set; } + public string Habitat { get; set; } + public string SamplingProtocol { get; set; } + public string SamplingEffort { get; set; } + public int? SampleSizeValue { get; set; } + public string SampleSizeUnit { get; set; } + public string FieldNotes { get; set; } + public string EventRemarks { get; set; } + public string LocationID { get; set; } + public string HigherGeographyID { get; set; } + public string HigherGeography { get; set; } + public string Continent { get; set; } + public string WaterBody { get; set; } + public string IslandGroup { get; set; } + public string Island { get; set; } + public string Country { get; set; } + public string CountryCode { get; set; } + public string StateProvince { get; set; } + public string County { get; set; } + public string Municipality { get; set; } + public string Locality { get; set; } + public string VerbatimLocality { get; set; } + public double? MinimumElevationInMeters { get; set; } + public string MaximumElevationInMeters { get; set; } + public string VerbatimElevation { get; set; } + public string MinimumDepthInMeters { get; set; } + public double? MaximumDepthInMeters { get; set; } + public string VerbatimDepth { get; set; } + public double? MinimumDistanceAboveSurfaceInMeters { get; set; } + public double? MaximumDistanceAboveSurfaceInMeters { get; set; } + public string LocationAccordingTo { get; set; } + public string LocationRemarks { get; set; } + public double? DecimalLatitude { get; set; } + public double? DecimalLongitude { get; set; } + public string GeodeticDatum { get; set; } + public double? CoordinateUncertaintyInMeters { get; set; } + public double? CoordinatePrecision { get; set; } + public double? PointRadiusSpatialFit { get; set; } + public string VerbatimCoordinates { get; set; } + public string VerbatimLatitude { get; set; } + public string VerbatimLongitude { get; set; } + public string VerbatimCoordinateSystem { get; set; } + public string VerbatimSRS { get; set; } + public string FootprintWKT { get; set; } + public string FootprintSRS { get; set; } + public double? FootprintSpatialFit { get; set; } + public string GeoreferencedBy { get; set; } + public string GeoreferencedDate { get; set; } + public string GeoreferenceProtocol { get; set; } + public string GeoreferenceSources { get; set; } + public string GeoreferenceVerificationStatus { get; set; } + public string GeoreferenceRemarks { get; set; } + public string GeologicalContextID { get; set; } + public string EarliestEonOrLowestEonothem { get; set; } + public string LatestEonOrHighestEonothem { get; set; } + public string EarliestEraOrLowestErathem { get; set; } + public string LatestEraOrHighestErathem { get; set; } + public string EarliestPeriodOrLowestSystem { get; set; } + public string LatestPeriodOrHighestSystem { get; set; } + public string EarliestEpochOrLowestSeries { get; set; } + public string LatestEpochOrHighestSeries { get; set; } + public string EarliestAgeOrLowestStage { get; set; } + public string LatestAgeOrHighestStage { get; set; } + public string LowestBiostratigraphicZone { get; set; } + public string HighestBiostratigraphicZone { get; set; } + public string LithostratigraphicTerms { get; set; } + public string Group { get; set; } + public string Formation { get; set; } + public string Member { get; set; } + public string Bed { get; set; } + public string IdentificationID { get; set; } + public string IdentificationQualifier { get; set; } + public string TypeStatus { get; set; } + public string IdentifiedBy { get; set; } + public DateTime? DateIdentified { get; set; } + public string IdentificationReferences { get; set; } + public string IdentificationVerificationStatus { get; set; } + public string IdentificationRemarks { get; set; } + public string TaxonID { get; set; } + public string ScientificNameID { get; set; } + public string AcceptedNameUsageID { get; set; } + public string ParentNameUsageID { get; set; } + public string OriginalNameUsageID { get; set; } + public string NameAccordingToID { get; set; } + public string NamePublishedInID { get; set; } + public string TaxonConceptID { get; set; } + public string ScientificName { get; set; } + public string AcceptedNameUsage { get; set; } + public string ParentNameUsage { get; set; } + public string OriginalNameUsage { get; set; } + public string NameAccordingTo { get; set; } + public string NamePublishedIn { get; set; } + public int? NamePublishedInYear { get; set; } + public string HigherClassification { get; set; } + public string Kingdom { get; set; } + public string Phylum { get; set; } + public string Class { get; set; } + public string Order { get; set; } + public string Family { get; set; } + public string Genus { get; set; } + public string Subgenus { get; set; } + public string SpecificEpithet { get; set; } + public string InfraspecificEpithet { get; set; } + public string TaxonRank { get; set; } + public string VerbatimTaxonRank { get; set; } + public string ScientificNameAuthorship { get; set; } + public string VernacularName { get; set; } + public string NomenclaturalCode { get; set; } + public string TaxonomicStatus { get; set; } + public string NomenclaturalStatus { get; set; } + public string TaxonRemarks { get; set; } + public string RecordedByID { get; set; } + public string IdentifiedByID { get; set; } +} \ No newline at end of file diff --git a/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj b/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj index 22a6609..beb8f1e 100644 --- a/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj +++ b/src/DwC-A_dotnet.Interactive/DwC-A_dotnet.Interactive.csproj @@ -11,20 +11,20 @@ DwC-A darwin-core Biodiversity Paul Joiner Paul Joiner - 0.1.7-Pre + 0.1.8-Pre .NET Interactive Extensions for Darwin Core Archive file reader - 0.1.7.0 - 0.1.7.0 + 0.1.8.0 + 0.1.8.0 LICENSE README.md - + Core - + @@ -39,6 +39,6 @@ True - + diff --git a/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs b/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs index 8edcccc..5ba5916 100644 --- a/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs +++ b/src/DwC-A_dotnet.Interactive/DwCKernelExtension.cs @@ -15,6 +15,7 @@ public class DwCKernelExtension : IKernelExtension { public Task OnLoadAsync(Kernel kernel) { + Formatter.Register(ArchiveMetaData.RegisterForArchiveReader, "text/html"); Formatter.Register(ArchiveMetaData.Register, "text/html"); Formatter.Register(FileReaderMetaData.Register, "text/html"); Formatter.Register(TermsFormatter.Register, "text/html"); diff --git a/src/DwC-A_dotnet.Interactive/Formatters/ArchiveMetaData.cs b/src/DwC-A_dotnet.Interactive/Formatters/ArchiveMetaData.cs index 6e0b883..eb88543 100644 --- a/src/DwC-A_dotnet.Interactive/Formatters/ArchiveMetaData.cs +++ b/src/DwC-A_dotnet.Interactive/Formatters/ArchiveMetaData.cs @@ -3,6 +3,7 @@ using System.Linq; using System.IO; using Core.DwC_A.Meta; +using Core.DwC_A; using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; @@ -10,6 +11,11 @@ namespace DwC_A.Interactive.Formatters { internal class ArchiveMetaData { + public static void RegisterForArchiveReader(ArchiveReader archiveReader, TextWriter writer) + { + Register(archiveReader.MetaData, writer); + } + public static void Register(Archive archive, TextWriter writer) { var header = tr(new[]