Skip to content

Gather data

Gather data #19

name: Gather data
on:
workflow_dispatch:
schedule:
- cron: '0 0 1 * *'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Generate the input CSV file from a fresh Majestic CSV file
run: |
wget http://downloads.majestic.com/majestic_million.csv
cat majestic_million.csv | awk -F "," 'NR>1 {print $1 "," $3}' > data/input.csv
rm majestic_million.csv
- name: Run data gathering script
run: |
cd scripts; python gather_data.py
ls -l --block-size=M ../data/data.db
- name: Set up Git user
run: git config --global user.email "gha@github.com"; git config --global user.name "GHActionBot"
- name: Commit update
run: git commit -am "Sync data DB and corresponding input CSV file"; git push