Skip to content

Commit

Permalink
Merge branch 'main' into pandas-devGH-15354-phased
Browse files Browse the repository at this point in the history
  • Loading branch information
rtpsw committed Feb 18, 2022
2 parents 527e0e0 + 1bd193e commit 9c225c0
Show file tree
Hide file tree
Showing 408 changed files with 9,956 additions and 5,253 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,27 @@ jobs:
name: Benchmarks log
path: asv_bench/benchmarks.log
if: failure()

build_docker_dev_environment:
name: Build Docker Dev Environment
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}

concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment
cancel-in-progress: true

steps:
- name: Clean up dangling images
run: docker image prune -f

- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Build image
run: docker build --pull --no-cache --tag pandas-dev-env .
3 changes: 2 additions & 1 deletion .github/workflows/datamanger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
data_manager:
name: Test experimental data manager
runs-on: ubuntu-latest
timeout-minutes: 120
services:
moto:
image: motoserver/moto
Expand All @@ -45,7 +46,7 @@ jobs:
- name: Run tests
env:
PANDAS_DATA_MANAGER: array
PATTERN: "not network and not clipboard"
PATTERN: "not network and not clipboard and not single_cpu"
PYTEST_WORKERS: "auto"
PYTEST_TARGET: pandas
run: |
Expand Down
67 changes: 41 additions & 26 deletions .github/workflows/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ on:
- "doc/**"

env:
PYTEST_WORKERS: "auto"
PANDAS_CI: 1

jobs:
Expand All @@ -22,35 +21,51 @@ jobs:
defaults:
run:
shell: bash -l {0}
timeout-minutes: 120
strategy:
matrix:
settings: [
[actions-38-downstream_compat.yaml, "not slow and not network and not clipboard", "", "", "", "", ""],
[actions-38-minimum_versions.yaml, "not clipboard", "", "", "", "", ""],
[actions-38.yaml, "not slow and not network", "language-pack-it xsel", "it_IT.utf8", "it_IT.utf8", "", ""],
[actions-38.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],
[actions-38.yaml, "not clipboard", "", "", "", "", ""],
[actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", "--max-worker-restart 0"],
[actions-39.yaml, "not clipboard", "", "", "", "", ""],
[actions-310-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],
[actions-310.yaml, "not clipboard", "", "", "", "", ""],
]
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
pattern: ["not single_cpu", "single_cpu"]
include:
- env_file: actions-38-downstream_compat.yaml
pattern: "not slow and not network and not single_cpu"
pytest_target: "pandas/tests/test_downstream.py"
- env_file: actions-38-minimum_versions.yaml
pattern: "not slow and not network and not single_cpu"
- env_file: actions-38.yaml
pattern: "not slow and not network and not single_cpu"
extra_apt: "language-pack-it"
lang: "it_IT.utf8"
lc_all: "it_IT.utf8"
- env_file: actions-38.yaml
pattern: "not slow and not network and not single_cpu"
extra_apt: "language-pack-zh-hans"
lang: "zh_CN.utf8"
lc_all: "zh_CN.utf8"
- env_file: actions-pypy-38.yaml
pattern: "not slow and not network and not single_cpu"
test_args: "--max-worker-restart 0"
- env_file: actions-310-numpydev.yaml
pattern: "not slow and not network and not single_cpu"
pandas_testing_mode: "deprecate"
test_args: "-W error"
fail-fast: false
env:
ENV_FILE: ci/deps/${{ matrix.settings[0] }}
PATTERN: ${{ matrix.settings[1] }}
EXTRA_APT: ${{ matrix.settings[2] }}
LANG: ${{ matrix.settings[3] }}
LC_ALL: ${{ matrix.settings[4] }}
PANDAS_TESTING_MODE: ${{ matrix.settings[5] }}
TEST_ARGS: ${{ matrix.settings[6] }}
PYTEST_TARGET: pandas
IS_PYPY: ${{ contains(matrix.settings[0], 'pypy') }}
ENV_FILE: ci/deps/${{ matrix.env_file }}
PATTERN: ${{ matrix.pattern }}
EXTRA_APT: ${{ matrix.extra_apt || '' }}
LANG: ${{ matrix.lang || '' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
# TODO: re-enable coverage on pypy, its slow
COVERAGE: ${{ !contains(matrix.settings[0], 'pypy') }}
COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }}-${{ matrix.settings[2] }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
cancel-in-progress: true

services:
Expand Down Expand Up @@ -105,7 +120,8 @@ jobs:
hashFiles('${{ env.ENV_FILE }}') }}

- name: Extra installs
run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 ${{ env.EXTRA_APT }}
# xsel for clipboard tests
run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }}

- uses: conda-incubator/setup-miniconda@v2
with:
Expand All @@ -127,8 +143,7 @@ jobs:
shell: bash
run: |
# TODO: re-enable cov, its slowing the tests down though
# TODO: Unpin Cython, the new Cython 0.29.26 is causing compilation errors
pip install Cython==0.29.25 numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
if: ${{ env.IS_PYPY == 'true' }}

- name: Build Pandas
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ on:
env:
PYTEST_WORKERS: "auto"
PANDAS_CI: 1
PATTERN: "not slow and not network and not clipboard"
PATTERN: "not slow and not network and not clipboard and not single_cpu"
COVERAGE: true
PYTEST_TARGET: pandas

Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ci:
autofix_prs: false
repos:
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.0
rev: v0.3.1
hooks:
- id: absolufy-imports
files: ^pandas/
Expand All @@ -16,7 +16,7 @@ repos:
pass_filenames: true
require_serial: false
- repo: https://github.com/python/black
rev: 21.12b0
rev: 22.1.0
hooks:
- id: black
- repo: https://github.com/codespell-project/codespell
Expand Down Expand Up @@ -50,7 +50,7 @@ repos:
- flake8==4.0.1
- flake8-comprehensions==3.7.0
- flake8-bugbear==21.3.2
- pandas-dev-flaker==0.2.0
- pandas-dev-flaker==0.4.0
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/condaforge/miniforge3
FROM quay.io/condaforge/miniforge3:4.11.0-0

# if you forked pandas, you can pass in your own GitHub username to use your fork
# i.e. gh_username=myname
Expand Down Expand Up @@ -45,4 +45,4 @@ RUN . /opt/conda/etc/profile.d/conda.sh \
&& cd "$pandas_home" \
&& export \
&& python setup.py build_ext -j 4 \
&& python -m pip install -e .
&& python -m pip install --no-build-isolation -e .
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ or alternatively
python setup.py develop
```

See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-from-source).

## License
[BSD 3](LICENSE)
Expand Down
10 changes: 5 additions & 5 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Factorize:
param_names = ["unique", "sort", "dtype"]

def setup(self, unique, sort, dtype):
N = 10 ** 5
N = 10**5
string_index = tm.makeStringIndex(N)
string_arrow = None
if dtype == "string[pyarrow]":
Expand Down Expand Up @@ -74,7 +74,7 @@ class Duplicated:
param_names = ["unique", "keep", "dtype"]

def setup(self, unique, keep, dtype):
N = 10 ** 5
N = 10**5
data = {
"int": pd.Index(np.arange(N), dtype="int64"),
"uint": pd.Index(np.arange(N), dtype="uint64"),
Expand All @@ -97,7 +97,7 @@ def time_duplicated(self, unique, keep, dtype):

class Hashing:
def setup_cache(self):
N = 10 ** 5
N = 10**5

df = pd.DataFrame(
{
Expand Down Expand Up @@ -145,7 +145,7 @@ class Quantile:
param_names = ["quantile", "interpolation", "dtype"]

def setup(self, quantile, interpolation, dtype):
N = 10 ** 5
N = 10**5
data = {
"int": np.arange(N),
"uint": np.arange(N).astype(np.uint64),
Expand All @@ -158,7 +158,7 @@ def time_quantile(self, quantile, interpolation, dtype):


class SortIntegerArray:
params = [10 ** 3, 10 ** 5]
params = [10**3, 10**5]

def setup(self, N):
data = np.arange(N, dtype=float)
Expand Down
20 changes: 10 additions & 10 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def setup(self, dtype):

elif dtype in ["category[object]", "category[int]"]:
# Note: sizes are different in this case than others
n = 5 * 10 ** 5
n = 5 * 10**5
sample_size = 100

arr = list(np.random.randint(0, n // 10, size=n))
Expand Down Expand Up @@ -174,7 +174,7 @@ class IsinWithArange:

def setup(self, dtype, M, offset_factor):
offset = int(M * offset_factor)
tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
tmp = Series(np.random.randint(offset, M + offset, 10**6))
self.series = tmp.astype(dtype)
self.values = np.arange(M).astype(dtype)

Expand All @@ -191,8 +191,8 @@ class IsInFloat64:
param_names = ["dtype", "title"]

def setup(self, dtype, title):
N_many = 10 ** 5
N_few = 10 ** 6
N_many = 10**5
N_few = 10**6
self.series = Series([1, 2], dtype=dtype)

if title == "many_different_values":
Expand Down Expand Up @@ -240,10 +240,10 @@ class IsInForObjects:
param_names = ["series_type", "vals_type"]

def setup(self, series_type, vals_type):
N_many = 10 ** 5
N_many = 10**5

if series_type == "nans":
ser_vals = np.full(10 ** 4, np.nan)
ser_vals = np.full(10**4, np.nan)
elif series_type == "short":
ser_vals = np.arange(2)
elif series_type == "long":
Expand All @@ -254,7 +254,7 @@ def setup(self, series_type, vals_type):
self.series = Series(ser_vals).astype(object)

if vals_type == "nans":
values = np.full(10 ** 4, np.nan)
values = np.full(10**4, np.nan)
elif vals_type == "short":
values = np.arange(2)
elif vals_type == "long":
Expand All @@ -277,7 +277,7 @@ class IsInLongSeriesLookUpDominates:
param_names = ["dtype", "MaxNumber", "series_type"]

def setup(self, dtype, MaxNumber, series_type):
N = 10 ** 7
N = 10**7

if series_type == "random_hits":
array = np.random.randint(0, MaxNumber, N)
Expand All @@ -304,15 +304,15 @@ class IsInLongSeriesValuesDominate:
param_names = ["dtype", "series_type"]

def setup(self, dtype, series_type):
N = 10 ** 7
N = 10**7

if series_type == "random":
vals = np.random.randint(0, 10 * N, N)
if series_type == "monotone":
vals = np.arange(N)

self.values = vals.astype(dtype.lower())
M = 10 ** 6 + 1
M = 10**6 + 1
self.series = Series(np.arange(M)).astype(dtype)

def time_isin(self, dtypes, series_type):
Expand Down

0 comments on commit 9c225c0

Please sign in to comment.