Skip to content

Commit

Permalink
Merge pull request #256 from refgenie/dev
Browse files Browse the repository at this point in the history
v0.12.0
  • Loading branch information
stolarczyk committed Jun 28, 2021
2 parents fb1337d + 7e56c5f commit dae54c4
Show file tree
Hide file tree
Showing 45 changed files with 1,044 additions and 313 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/black.yml
@@ -1,11 +1,11 @@
name: Lint

on: [push, pull_request]
on: [push]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: psf/black@20.8b1
- uses: psf/black@stable
6 changes: 3 additions & 3 deletions .github/workflows/build-package.yml
Expand Up @@ -2,16 +2,16 @@ name: Build package

on:
push:
branches: [master, dev]
branches: [master]
pull_request:
branches: [master, dev]
branches: [master]

jobs:
build-package:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
python-version: [3.6, 3.9]
os: [ubuntu-latest, macos-latest]

steps:
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/test-refgenie-cli.yml
Expand Up @@ -2,7 +2,9 @@ name: Test refgenie CLI

on:
push:
branches: [master, dev]
branches: [master]
pull_request:
branches: [master]

jobs:
test_CLI:
Expand Down Expand Up @@ -152,3 +154,8 @@ jobs:
echo "Error: seek and populate returned different paths -- seek: ${seek_path}; populate: ${populate_path}"
exit 1
fi
- name: refgenie build --pull-parents
run: |
refgenie build rCRSd/fasta_child -c genomes/g.yaml --pull-parent --recipe tests/data/recipe_child.json
./tests/assert_in_file.sh genomes/g.yaml rCRSd 0 # rCRSd should be initialized because fasta had to be pulled
16 changes: 8 additions & 8 deletions .gitignore
Expand Up @@ -12,16 +12,16 @@ tests/test/*
# generic ignore list:
*.lst

# Compiled source
# Compiled source
*.com
*.class
*.dll
*.exe
*.o
*.so
*.pyc
# Packages

# Packages
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
Expand All @@ -32,13 +32,13 @@ tests/test/*
*.rar
*.tar
*.zip
# Logs and databases

# Logs and databases
*.log
*.sql
*.sqlite
# OS generated files

# OS generated files
.DS_Store
.DS_Store?
._*
Expand All @@ -47,7 +47,7 @@ tests/test/*
ehthumbs.db
Thumbs.db

# Gedit temporary files
# Gedit temporary files
*~

# libreoffice lock files:
Expand Down
7 changes: 4 additions & 3 deletions .pre-commit-config.yaml
@@ -1,20 +1,21 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
rev: v4.0.1
hooks:
- id: trailing-whitespace
- id: check-yaml
- id: end-of-file-fixer
- id: requirements-txt-fixer
- id: trailing-whitespace
- id: check-ast

- repo: https://github.com/PyCQA/isort
rev: 5.7.0
rev: 5.9.1
hooks:
- id: isort
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 20.8b1
rev: 21.6b0
hooks:
- id: black
75 changes: 38 additions & 37 deletions containers/Dockerfile_refgenie
Expand Up @@ -6,62 +6,63 @@ FROM ubuntu:18.04
MAINTAINER Nathan Sheffield <nathan@code.databio.org>

# Updates and dependencies
RUN apt-get update && apt-get install -y wget git unzip
RUN apt-get install -y python python-pip
RUN apt-get install -y curl
RUN apt-get update && apt-get install -y wget git unzip
RUN apt-get install -y python python-pip
RUN apt-get install -y curl

# htslib 1.9 (tabix cmd)
RUN apt-get install -y libz-dev libncurses-dev
RUN wget -O ~/htslib.tar.gz https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar xjf ~/htslib.tar.gz && cd htslib-1.9 && ./configure && make && make install
ENV PATH="/htslib-1.9:${PATH}"
RUN apt-get install -y libz-dev libncurses-dev
RUN apt-get install -y libbz2-dev liblzma-dev
RUN wget -O ~/htslib.tar.gz https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2 && tar xjf ~/htslib.tar.gz && cd htslib-1.9 && ./configure && make && make install
ENV PATH="/htslib-1.9:${PATH}"

# install samtools 1.3.1
RUN wget -O ~/samtools.bz2 https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2
RUN tar -xf ~/samtools.bz2
RUN cd /samtools-1.3.1 && make
ENV PATH="/samtools-1.3.1:${PATH}"
RUN wget -O ~/samtools.bz2 https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2
RUN tar -xf ~/samtools.bz2
RUN cd /samtools-1.3.1 && make
ENV PATH="/samtools-1.3.1:${PATH}"

# bowtie2 2.3.0 and deps
RUN apt-get install -y libtbb-dev # bowtie2 dependencies
RUN wget -O ~/bowtie.zip "https://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.3.0/bowtie2-2.3.0-linux-x86_64.zip?r=https%3A%2F%2Fsourceforge.net%2Fprojects%2Fbowtie-bio%2Ffiles%2Fbowtie2%2F2.3.0%2F&ts=1485465820&use_mirror=kent"
RUN unzip ~/bowtie.zip
ENV PATH="/bowtie2-2.3.0:${PATH}"
RUN apt-get install -y libtbb-dev # bowtie2 dependencies
RUN wget -O ~/bowtie.zip "https://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.3.0/bowtie2-2.3.0-linux-x86_64.zip?r=https%3A%2F%2Fsourceforge.net%2Fprojects%2Fbowtie-bio%2Ffiles%2Fbowtie2%2F2.3.0%2F&ts=1485465820&use_mirror=kent"
RUN unzip ~/bowtie.zip
ENV PATH="/bowtie2-2.3.0:${PATH}"

# Bismark Methylation caller 0.17.0
RUN wget -O ~/bismark.zip https://github.com/FelixKrueger/Bismark/archive/0.17.0.zip
RUN unzip ~/bismark.zip
ENV PATH="/Bismark-0.17.0:${PATH}"
RUN wget -O ~/bismark.zip https://github.com/FelixKrueger/Bismark/archive/0.17.0.zip
RUN unzip ~/bismark.zip
ENV PATH="/Bismark-0.17.0:${PATH}"

# hisat2 2.0.5
RUN wget -O ~/hisat.zip ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.5-source.zip
RUN unzip ~/hisat.zip
RUN cd hisat2-2.0.5 && make
ENV PATH="/hisat2-2.0.5:${PATH}"
RUN wget -O ~/hisat.zip ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.5-source.zip
RUN unzip ~/hisat.zip
RUN cd hisat2-2.0.5 && make
ENV PATH="/hisat2-2.0.5:${PATH}"

# kallisto
RUN wget -O ~/kallisto.tar.gz https://github.com/pachterlab/kallisto/releases/download/v0.43.0/kallisto_linux-v0.43.0.tar.gz
RUN tar -xf ~/kallisto.tar.gz
ENV PATH="/kallisto_linux-v0.43.0:${PATH}"
RUN wget -O ~/kallisto.tar.gz https://github.com/pachterlab/kallisto/releases/download/v0.43.0/kallisto_linux-v0.43.0.tar.gz
RUN tar -xf ~/kallisto.tar.gz
ENV PATH="/kallisto_linux-v0.43.0:${PATH}"

# epilog meth calling
# TODO: Use public version after publication.
#RUN wget -O epilog_indexer.py https://github.com/databio/epilog/blob/master/epilog/epilog_indexer.py
ADD includes/epilog_indexer.py bin/epilog_indexer.py
RUN pip install --user regex
# TODO: Use public version after publication.
#RUN wget -O epilog_indexer.py https://github.com/databio/epilog/blob/master/epilog/epilog_indexer.py
ADD includes/epilog_indexer.py bin/epilog_indexer.py
RUN pip install --user regex

# UCSC twoBitToFa
ADD includes/twoBitToFa bin/twoBitToFa
RUN apt-get install -y libpng-dev
ADD includes/twoBitToFa bin/twoBitToFa
RUN apt-get install -y libpng-dev

# bwa 0.7.17
RUN wget -O ~/bwa-0.7.17.tar.bz2 https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2
RUN tar -xf ~/bwa-0.7.17.tar.bz2
run cd /bwa-0.7.17 && make
ENV PATH="/bwa-0.7.17:${PATH}"
RUN wget -O ~/bwa-0.7.17.tar.bz2 https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2
RUN tar -xf ~/bwa-0.7.17.tar.bz2
RUN cd /bwa-0.7.17 && make
ENV PATH="/bwa-0.7.17:${PATH}"

# STAR 2.7.1a
RUN wget -O ~/STAR.tar.gz https://github.com/alexdobin/STAR/archive/2.7.1a.tar.gz && tar -xf ~/STAR.tar.gz && cd STAR-2.7.1a/source && make STAR
ENV PATH="/STAR-2.7.1a/source:${PATH}"
RUN wget -O ~/STAR.tar.gz https://github.com/alexdobin/STAR/archive/2.7.1a.tar.gz && tar -xf ~/STAR.tar.gz && cd STAR-2.7.1a/source && make STAR
ENV PATH="/STAR-2.7.1a/source:${PATH}"

# GenomeTools 1.5.10
RUN apt-get install -y genometools
RUN apt-get install -y genometools
2 changes: 1 addition & 1 deletion containers/Makefile
Expand Up @@ -2,4 +2,4 @@ refgenie:
mkdir -p includes
wget https://github.com/nsheff/docker/raw/master/includes/twoBitToFa -P includes
wget https://github.com/nsheff/docker/blob/master/includes/epilog_indexer.py -P includes
docker build -t databio/refgenie -f Dockerfile_refgenie .
docker build -t databio/refgenie -f Dockerfile_refgenie .
8 changes: 4 additions & 4 deletions docs/asset_registry_paths.md
@@ -1,9 +1,9 @@
# Asset registry paths

Each asset is defined by four components:

1. genome name
2. asset name
2. asset name
3. tag name
4. seek key

Expand Down Expand Up @@ -38,7 +38,7 @@ How did it work?
Alternatively, you can specify all of these namespace components as command line arguments:

```console
refgenie seek -g rCRSd -a fasta -t default
refgenie seek -g rCRSd -a fasta -t default
```

One advantage of this approach is that it allows you to refer to multiple assets belonging to the same genome.
One advantage of this approach is that it allows you to refer to multiple assets belonging to the same genome.

0 comments on commit dae54c4

Please sign in to comment.