From 0b10e0760295bbb373d949ed3165c8d1bb973811 Mon Sep 17 00:00:00 2001 From: Vanessa Sochat Date: Sat, 4 Aug 2018 08:59:28 -0700 Subject: [PATCH] adding paper.md --- paper/paper.bib | 124 ++++++++++++++++++++++++++++++++++++++++++++++++ paper/paper.md | 43 +++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 paper/paper.bib create mode 100644 paper/paper.md diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..a492b30 --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,124 @@ +@MISC{Docker-hub, + title = "Docker Hub", + howpublished = "\url{https://hub.docker.com/}", + note = "Accessed: 2015-11-4" +} + + +@MISC{noauthor_undated-vt, + title = "{GitHub} Pages", + abstract = "Websites for you and your projects, hosted directly from your + GitHub repository. Just edit, push, and your changes are live.", + institution = "Github" +} + +@ARTICLE{Ram2013-km, + title = "Git can facilitate greater reproducibility and increased + transparency in science", + author = "Ram, Karthik", + affiliation = "Environmental Science, Policy, and Management, University of + California, Berkeley, Berkeley, CA 94720, USA. + karthik.ram@berkeley.edu.", + abstract = "BACKGROUND: Reproducibility is the hallmark of good science. + Maintaining a high degree of transparency in scientific + reporting is essential not just for gaining trust and + credibility within the scientific community but also for + facilitating the development of new ideas. Sharing data and + computer code associated with publications is becoming + increasingly common, motivated partly in response to data + deposition requirements from journals and mandates from + funders. Despite this increase in transparency, it is still + difficult to reproduce or build upon the findings of most + scientific publications without access to a more complete + workflow. FINDINGS: Version control systems (VCS), which have + long been used to maintain code repositories in the software + industry, are now finding new applications in science. One + such open source VCS, Git, provides a lightweight yet robust + framework that is ideal for managing the full suite of + research outputs such as datasets, statistical code, figures, + lab notes, and manuscripts. For individual researchers, Git + provides a powerful way to track and compare versions, retrace + errors, explore new approaches in a structured manner, while + maintaining a full audit trail. For larger collaborative + efforts, Git and Git hosting services make it possible for + everyone to work asynchronously and merge their contributions + at any time, all the while maintaining a complete authorship + trail. In this paper I provide an overview of Git along with + use-cases that highlight how this tool can be leveraged to + make science more reproducible and transparent, foster new + collaborations, and support novel uses.", + journal = "Source Code Biol. Med.", + volume = 8, + number = 1, + pages = "7", + month = feb, + year = 2013, + language = "en" +} + +@ARTICLE{Sochat2017-ud, + title = "Enhancing reproducibility in scientific computing: Metrics and + registry for Singularity containers", + author = "Sochat, Vanessa V and Prybol, Cameron J and Kurtzer, Gregory M", + abstract = "Here we present Singularity Hub, a framework to build and deploy + Singularity containers for mobility of compute, and the + singularity-python software with novel metrics for assessing + reproducibility of such containers. Singularity containers make + it possible for scientists and developers to package reproducible + software, and Singularity Hub adds automation to this workflow by + building, capturing metadata for, visualizing, and serving + containers programmatically. Our novel metrics, based on custom + filters of content hashes of container contents, allow for + comparison of an entire container, including operating system, + custom software, and metadata. First we will review Singularity + Hub's primary use cases and how the infrastructure has been + designed to support modern, common workflows. Next, we conduct + three analyses to demonstrate build consistency, reproducibility + metric and performance and interpretability, and potential for + discovery. This is the first effort to demonstrate a rigorous + assessment of measurable similarity between containers and + operating systems. We provide these capabilities within + Singularity Hub, as well as the source software + singularity-python that provides the underlying functionality. + Singularity Hub is available at https://singularity-hub.org, and + we are excited to provide it as an openly available platform for + building, and deploying scientific containers.", + journal = "PLoS One", + volume = 12, + number = 11, + pages = "e0188511", + month = nov, + year = 2017, + language = "en" +} + + +@ARTICLE{Merkel2014-da, + title = "Docker: Lightweight Linux Containers for Consistent Development + and Deployment", + author = "Merkel, Dirk", + journal = "Linux J.", + publisher = "Belltown Media", + volume = 2014, + number = 239, + month = mar, + year = 2014, + address = "Houston, TX" +} + + +@MISC{noauthor_undated-hl, + title = "container-diff", + abstract = "container-diff: Diff your Docker containers", + institution = "Github" +} + + +@MISC{noauthor_undated-nt, + title = "Continuous Integration and Deployment", + abstract = "Hosted Continuous Integration for web applications. Set up + your application for testing in one click, on the fastest + testing platform on the internet.", + howpublished = "\url{https://circleci.com/dashboard}", + note = "Accessed: 2018-8-4" +} diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..bcebe31 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,43 @@ +--- +title: 'Containershare: Open Source Registry to build, test, deploy with CircleCI' +tags: + - containers + - continuous integration + - circleci + - docker + - api + - reproducibility +authors: + - name: Vanessa Sochat + orcid: 0000-0002-4387-3819 + affiliation: 1 +affiliations: + - name: Stanford University Research Computing + index: 1 +date: 04 August 2018 +bibliography: paper.bib +--- + +# Summary + +Containershare is an open source library of containers, both providing itself as a [template](https://www.github.com/vsoch/containershare), a [library](https://vsoch.github.io/containershare), and production [application programming interface](https://vsoch.github.io/containershare/library.json) (API) for interested users. Specifically, it is a complete metadata registry that can be freely deployed directly from a Github repository to validate and serve tested, tagged, and version controlled containers, each maintained from independent Github repositories. The registry uses several open source and free to use solutions to accomplish this, and brings them together programatically with steps that are easy for the user to set up. Specifically, the user must connect the repository to the continuous integration service CircleCI [@noauthor_undated-nt] and then turn on Github Pages [@noauthor_undated-vt] from the repository web interface. After these steps, adding text files to describe other container repositories via pull requests will test the submissions, and programatically add them to all components of the library. + +![https://vsoch.github.io/assets/images/posts/containershare/table.png](https://vsoch.github.io/assets/images/posts/containershare/table.png) + +## Container Repository Templates + +The individual container repository is responsible for serving its own metadata that the containershare can discover, validate, and provide for the user. It is also driven by a simple definition of a general CircleCI workflow, including building of a container, extraction of metadata, and then deployment of the container to Docker Hub and metadata files and user interface back to Github Pages. The workflow definitions to accomplish this are provided by containershare, and include: + + - A table of version controlled containers, where each commit is associated with a tagged container for the user to interact with + +![https://vsoch.github.io/assets/images/posts/containershare/share.png](https://vsoch.github.io/assets/images/posts/containershare/share.png) + + - A complete list of tags for a program to discover and consistently query for the associated container metadata + - The traditional image manifests that are provided by Docker Hub + - An "inspection" of the container that includes package manager packages and versions, along with a listing of files and sizes inside of the container, extracted during the build step using the open source Container Diff [@noauthor_undated-hl] tool provided by Google Open Source. + + ![https://vsoch.github.io/assets/images/posts/containershare/inspect.png](https://vsoch.github.io/assets/images/posts/containershare/inspect.png) + +Containershare serves as a general skeleton that can be extended to several use cases and themes, including sharing of containers for behavioral paradigms, open source publications, or components to scientific workflows. The use of open source tools like Github and CircleCI means that the implementation is completely transparent and customizable for an individual or institutional needs. For a research scientist, submission of a container repository to a containershare gives confidence that the container can be discovered. For a service provider, deployment of a container share (and subsequent provision of containers using it) gives confidence that the service users have a central location to discover containers, and API to discover them programmatically. Containershare and links to container repository templates are provided for use from [the containershare repository](https://www.github.com/vsoch/containershare), contributions in the way of code or issues are encouraged, and for interested readers, a more descriptive writeup [is available](https://vsoch.github.io/2018/build-deploy-docs/). + +# References