Skip to content

feat: hack 2024 ingest all the logs #81289

feat: hack 2024 ingest all the logs

feat: hack 2024 ingest all the logs #81289

Workflow file for this run

# This workflow runs all of our backend django tests.
#
# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching
# .github/workflows/ci-backend-update-test-timing.yml action
name: Backend CI
on:
push:
branches:
- master
pull_request:
workflow_dispatch:
inputs:
clickhouseServerVersion:
description: ClickHouse server version. Leave blank for default
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
# This is so that the workflow run isn't canceled when a snapshot update is pushed within it by posthog-bot
# We do however cancel from container-images-ci.yml if a commit is pushed by someone OTHER than posthog-bot
cancel-in-progress: false
env:
SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only
DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog'
REDIS_URL: 'redis://localhost'
CLICKHOUSE_HOST: 'localhost'
CLICKHOUSE_SECURE: 'False'
CLICKHOUSE_VERIFY: 'False'
TEST: 1
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }}
OBJECT_STORAGE_ENABLED: 'True'
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000'
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user'
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password'
jobs:
# Job to decide if we should run backend ci
# See https://github.com/dorny/paths-filter#conditional-execution for more details
changes:
runs-on: ubuntu-latest
timeout-minutes: 5
if: github.repository == 'PostHog/posthog'
name: Determine need to run backend checks
# Set job outputs to values from filter step
outputs:
backend: ${{ steps.filter.outputs.backend }}
steps:
# For pull requests it's not necessary to checkout the code, but we
# also want this to run on master so we need to checkout
- uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
backend:
# Avoid running backend tests for irrelevant changes
# NOTE: we are at risk of missing a dependency here. We could make
# the dependencies more clear if we separated the backend/frontend
# code completely
# really we should ignore ee/frontend/** but dorny doesn't support that
# - '!ee/frontend/**'
# including the negated rule appears to work
# but makes it always match because the checked file always isn't `ee/frontend/**` 🙈
- 'ee/**/*'
- 'posthog/**/*'
- 'bin/*.py'
- requirements.txt
- requirements-dev.txt
- mypy.ini
- pytest.ini
- frontend/src/queries/schema.json # Used for generating schema.py
- plugin-transpiler/src # Used for transpiling plugins
# Make sure we run if someone is explicitly change the workflow
- .github/workflows/ci-backend.yml
- .github/actions/run-backend-tests/action.yml
# We use docker compose for tests, make sure we rerun on
# changes to docker-compose.dev.yml e.g. dependency
# version changes
- docker-compose.dev.yml
- frontend/public/email/*
# These scripts are used in the CI
- bin/check_temporal_up
- bin/check_kafka_clickhouse_up
backend-code-quality:
needs: changes
timeout-minutes: 30
name: Python code quality checks
runs-on: ubuntu-latest
steps:
# If this run wasn't initiated by the bot (meaning: snapshot update) and we've determined
# there are backend changes, cancel previous runs
- uses: n1hility/cancel-previous-runs@v3
if: github.actor != 'posthog-bot' && needs.changes.outputs.backend == 'true'
with:
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v3
with:
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.10.10
cache: 'pip'
cache-dependency-path: '**/requirements*.txt'
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}
# uv is a fast pip alternative: https://github.com/astral-sh/uv/
- run: pip install uv
- name: Install SAML (python3-saml) dependencies
run: |
sudo apt-get update
sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl
- name: Install Python dependencies
run: |
uv pip install --system -r requirements.txt -r requirements-dev.txt
- name: Check for syntax errors, import sort, and code style violations
run: |
ruff check .
- name: Check formatting
run: |
ruff format --check --diff .
- name: Add Problem Matcher
run: echo "::add-matcher::.github/mypy-problem-matcher.json"
- name: Check static typing
run: |
mypy -p posthog | mypy-baseline filter
- name: Check if "schema.py" is up to date
run: |
npm run schema:build:python && git diff --exit-code
- name: Check if ANTLR definitions are up to date
run: |
cd ..
sudo apt-get install default-jre
mkdir antlr
cd antlr
curl -o antlr.jar https://www.antlr.org/download/antlr-$ANTLR_VERSION-complete.jar
export PWD=`pwd`
echo '#!/bin/bash' > antlr
echo "java -jar $PWD/antlr.jar \$*" >> antlr
chmod +x antlr
export CLASSPATH=".:$PWD/antlr.jar:$CLASSPATH"
export PATH="$PWD:$PATH"
cd ../posthog
antlr | grep "Version"
npm run grammar:build && git diff --exit-code
env:
# Installing a version of ANTLR compatible with what's in Homebrew as of October 2023 (version 4.13),
# as apt-get is quite out of date. The same version must be set in hogql_parser/pyproject.toml
ANTLR_VERSION: '4.13.1'
check-migrations:
needs: changes
if: needs.changes.outputs.backend == 'true'
timeout-minutes: 10
name: Validate Django and CH migrations
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Stop/Start stack with Docker Compose
run: |
docker compose -f docker-compose.dev.yml down
docker compose -f docker-compose.dev.yml up -d
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.10.10
cache: 'pip'
cache-dependency-path: '**/requirements*.txt'
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}
# uv is a fast pip alternative: https://github.com/astral-sh/uv/
- run: pip install uv
- name: Install SAML (python3-saml) dependencies
run: |
sudo apt-get update
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl
# First running migrations from master, to simulate the real-world scenario
- name: Checkout master
uses: actions/checkout@v3
with:
ref: master
- name: Install python dependencies for master
run: |
uv pip install --system -r requirements.txt -r requirements-dev.txt
- name: Run migrations up to master
run: |
python manage.py migrate
# Now we can consider this PR's migrations
- name: Checkout this PR
uses: actions/checkout@v3
- name: Install python dependencies for this PR
run: |
uv pip install --system -r requirements.txt -r requirements-dev.txt
- name: Run migrations for this PR
run: |
python manage.py migrate
- name: Check migrations
run: |
python manage.py makemigrations --check --dry-run
git fetch origin master
# `git diff --name-only` returns a list of files that were changed - added OR deleted OR modified
# With `--name-status` we get the same, but including a column for status, respectively: A, D, M
# In this check we exclusively care about files that were
# added (A) in posthog/migrations/. We also want to ignore
# initial migrations (0001_*) as these are guaranteed to be
# run on initial setup where there is no data.
git diff --name-status origin/master..HEAD | grep "A\sposthog/migrations/" | awk '{print $2}' | grep -v migrations/0001_ | python manage.py test_migrations_are_safe
- name: Check CH migrations
run: |
# Same as above, except now for CH looking at files that were added in posthog/clickhouse/migrations/
git diff --name-status origin/master..HEAD | grep "A\sposthog/clickhouse/migrations/" | awk '{print $2}' | python manage.py test_ch_migrations_are_safe
django:
needs: changes
timeout-minutes: 30
name: Django tests – ${{ matrix.segment }} (persons-on-events ${{ matrix.person-on-events && 'on' || 'off' }}), Py ${{ matrix.python-version }}, ${{ matrix.clickhouse-server-image }} (${{matrix.group}}/${{ matrix.concurrency }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ['3.10.10']
clickhouse-server-image: ['clickhouse/clickhouse-server:23.12.5.81-alpine']
segment: ['Core']
person-on-events: [false, true]
# :NOTE: Keep concurrency and groups in sync
concurrency: [10]
group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
include:
- segment: 'Temporal'
person-on-events: false
clickhouse-server-image: 'clickhouse/clickhouse-server:23.12.5.81-alpine'
python-version: '3.10.10'
concurrency: 1
group: 1
steps:
# The first step is the only one that should run if `needs.changes.outputs.backend == 'false'`.
# All the other ones should rely on `needs.changes.outputs.backend` directly or indirectly, so that they're
# effectively skipped if backend code is unchanged. See https://github.com/PostHog/posthog/pull/15174.
- uses: actions/checkout@v3
with:
fetch-depth: 1
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}
# Use PostHog Bot token when not on forks to enable proper snapshot updating
token: ${{ github.event.pull_request.head.repo.full_name == github.repository && secrets.POSTHOG_BOT_GITHUB_TOKEN || github.token }}
- uses: ./.github/actions/run-backend-tests
if: needs.changes.outputs.backend == 'true'
with:
segment: ${{ matrix.segment }}
person-on-events: ${{ matrix.person-on-events }}
python-version: ${{ matrix.python-version }}
clickhouse-server-image: ${{ matrix.clickhouse-server-image }}
concurrency: ${{ matrix.concurrency }}
group: ${{ matrix.group }}
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}
- uses: EndBug/add-and-commit@v9
# Skip on forks
# Also skip for persons-on-events runs, as we want to ignore snapshots diverging there
if: ${{ github.repository == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }}
with:
add: '["ee", "./**/*.ambr", "posthog/queries/", "posthog/migrations", "posthog/tasks", "posthog/hogql/"]'
message: 'Update query snapshots'
pull: --rebase --autostash # Make sure we're up-to-date with other segments' updates
default_author: github_actions
github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}
- name: Check if any snapshot changes were left uncomitted
id: changed-files
if: ${{ github.repository == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }}
run: |
if [[ -z $(git status -s | grep -v ".test_durations" | tr -d "\n") ]]
then
echo 'files_found=false' >> $GITHUB_OUTPUT
else
echo 'diff=$(git status --porcelain)' >> $GITHUB_OUTPUT
echo 'files_found=true' >> $GITHUB_OUTPUT
fi
- name: Fail CI if some snapshots have been updated but not committed
if: steps.changed-files.outputs.files_found == 'true' && steps.add-and-commit.outcome == 'success'
run: |
echo "${{ steps.changed-files.outputs.diff }}"
exit 1
- name: Archive email renders
uses: actions/upload-artifact@v3
if: needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' && matrix.person-on-events == false
with:
name: email_renders
path: posthog/tasks/test/__emails__
retention-days: 5
async-migrations:
name: Async migrations tests - ${{ matrix.clickhouse-server-image }}
needs: changes
strategy:
fail-fast: false
matrix:
clickhouse-server-image: ['clickhouse/clickhouse-server:23.12.5.81-alpine']
if: needs.changes.outputs.backend == 'true'
runs-on: ubuntu-latest
steps:
- name: 'Checkout repo'
uses: actions/checkout@v3
with:
fetch-depth: 1
- name: Start stack with Docker Compose
run: |
export CLICKHOUSE_SERVER_IMAGE_VERSION=${{ matrix.clickhouse-server-image }}
docker compose -f docker-compose.dev.yml down
docker compose -f docker-compose.dev.yml up -d
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.10.10
cache: 'pip'
cache-dependency-path: '**/requirements*.txt'
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}
# uv is a fast pip alternative: https://github.com/astral-sh/uv/
- run: pip install uv
- name: Install SAML (python3-saml) dependencies
run: |
sudo apt-get update
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl
- name: Install python dependencies
shell: bash
run: |
uv pip install --system -r requirements.txt -r requirements-dev.txt
- name: Add kafka host to /etc/hosts for kafka connectivity
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts
- name: Set up needed files
run: |
mkdir -p frontend/dist
touch frontend/dist/index.html
touch frontend/dist/layout.html
touch frontend/dist/exporter.html
- name: Wait for Clickhouse & Kafka
run: bin/check_kafka_clickhouse_up
- name: Run async migrations tests
run: |
pytest -m "async_migrations"
calculate-running-time:
name: Calculate running time
needs: [django, async-migrations]
runs-on: ubuntu-latest
if: needs.changes.outputs.backend == 'true'
steps:
- name: Calculate running time
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
run_id=${GITHUB_RUN_ID}
repo=${GITHUB_REPOSITORY}
run_info=$(gh api repos/${repo}/actions/runs/${run_id})
echo run_info: ${run_info}
# name is the name of the workflow file
# run_started_at is the start time of the workflow
# we want to get the number of seconds between the start time and now
name=$(echo ${run_info} | jq -r '.name')
run_url=$(echo ${run_info} | jq -r '.url')
run_started_at=$(echo ${run_info} | jq -r '.run_started_at')
run_attempt=$(echo ${run_info} | jq -r '.run_attempt')
start_seconds=$(date -d "${run_started_at}" +%s)
now_seconds=$(date +%s)
duration=$((now_seconds-start_seconds))
echo running_time_duration_seconds=${duration} >> $GITHUB_ENV
echo running_time_run_url=${run_url} >> $GITHUB_ENV
echo running_time_run_attempt=${run_attempt} >> $GITHUB_ENV
echo running_time_run_id=${run_id} >> $GITHUB_ENV
echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV
- name: Capture running time to PostHog
if: github.repository == 'PostHog/posthog'
uses: PostHog/posthog-github-action@v0.1
with:
posthog-token: ${{secrets.POSTHOG_API_TOKEN}}
event: 'posthog-ci-running-time'
properties: '{"duration_seconds": ${{ env.running_time_duration_seconds }}, "run_url": "${{ env.running_time_run_url }}", "run_attempt": "${{ env.running_time_run_attempt }}", "run_id": "${{ env.running_time_run_id }}", "run_started_at": "${{ env.running_time_run_started_at }}"}'