Skip to content

Commit

Permalink
Test with PySpark 3.2 (#2203)
Browse files Browse the repository at this point in the history
Adds test matrix to test with PySpark `3.2`.
Also upgrades PySpark `3.1` to `3.1.2`.
  • Loading branch information
ueshin committed Oct 18, 2021
1 parent f44c050 commit a7e7bc7
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 13 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@ jobs:
numpy-version: 1.19.5
default-index-type: 'distributed-sequence'
- python-version: 3.9
spark-version: 3.1.1
spark-version: 3.1.2
pandas-version: 1.2.5
pyarrow-version: 3.0.0
numpy-version: 1.20.3
- python-version: 3.9
spark-version: 3.2.0
pandas-version: 1.2.5
pyarrow-version: 4.0.1
numpy-version: 1.21.2
default-index-type: 'distributed-sequence'
env:
PYTHON_VERSION: ${{ matrix.python-version }}
Expand Down
14 changes: 7 additions & 7 deletions databricks/koalas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ def apply(self, func, *args, **kwargs) -> Union[DataFrame, Series]:
>>> def plus_max(x) -> ks.Series[np.int]:
... return x + x.max()
>>> df.B.groupby(df.A).apply(plus_max).sort_index()
>>> df.B.groupby(df.A).apply(plus_max).sort_index() # doctest: +SKIP
0 6
1 3
2 4
Expand All @@ -1091,7 +1091,7 @@ def apply(self, func, *args, **kwargs) -> Union[DataFrame, Series]:
>>> def plus_length(x) -> np.int:
... return len(x)
>>> df.B.groupby(df.A).apply(plus_length).sort_index()
>>> df.B.groupby(df.A).apply(plus_length).sort_index() # doctest: +SKIP
0 1
1 2
Name: B, dtype: int64
Expand All @@ -1100,7 +1100,7 @@ def apply(self, func, *args, **kwargs) -> Union[DataFrame, Series]:
>>> def calculation(x, y, z) -> np.int:
... return len(x) + y * z
>>> df.B.groupby(df.A).apply(calculation, 5, z=10).sort_index()
>>> df.B.groupby(df.A).apply(calculation, 5, z=10).sort_index() # doctest: +SKIP
0 51
1 52
Name: B, dtype: int64
Expand Down Expand Up @@ -1903,12 +1903,12 @@ def tail(self, n=5) -> Union[DataFrame, Series]:
... 'b': [2, 3, 1, 4, 6, 9, 8, 10, 7, 5],
... 'c': [3, 5, 2, 5, 1, 2, 6, 4, 3, 6]},
... columns=['a', 'b', 'c'],
... index=[7, 2, 4, 1, 3, 4, 9, 10, 5, 6])
... index=[7, 2, 3, 1, 3, 4, 9, 10, 5, 6])
>>> df
a b c
7 1 2 3
2 1 3 5
4 1 1 2
3 1 1 2
1 1 4 5
3 2 6 1
4 2 9 2
Expand All @@ -1920,16 +1920,16 @@ def tail(self, n=5) -> Union[DataFrame, Series]:
>>> df.groupby('a').tail(2).sort_index()
a b c
1 1 4 5
3 1 1 2
4 2 9 2
4 1 1 2
5 3 7 3
6 3 5 6
9 2 8 6
>>> df.groupby('a')['b'].tail(2).sort_index()
1 4
3 1
4 9
4 1
5 7
6 5
9 8
Expand Down
6 changes: 3 additions & 3 deletions databricks/koalas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,12 +1381,12 @@ def symmetric_difference(self, other, result_name=None, sort=None) -> "Index":
>>> s1 = ks.Series([1, 2, 3, 4], index=[1, 2, 3, 4])
>>> s2 = ks.Series([1, 2, 3, 4], index=[2, 3, 4, 5])
>>> s1.index.symmetric_difference(s2.index)
>>> s1.index.symmetric_difference(s2.index) # doctest: +SKIP
Int64Index([5, 1], dtype='int64')
You can set name of result Index.
>>> s1.index.symmetric_difference(s2.index, result_name='koalas')
>>> s1.index.symmetric_difference(s2.index, result_name='koalas') # doctest: +SKIP
Int64Index([5, 1], dtype='int64', name='koalas')
You can set sort to `True`, if you want to sort the resulting index.
Expand All @@ -1396,7 +1396,7 @@ def symmetric_difference(self, other, result_name=None, sort=None) -> "Index":
You can also use the ``^`` operator:
>>> s1.index ^ s2.index
>>> s1.index ^ s2.index # doctest: +SKIP
Int64Index([5, 1], dtype='int64')
"""
if type(self) != type(other):
Expand Down
4 changes: 2 additions & 2 deletions dev/pytest
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ fi

# Runs both doctests and unit tests by default, otherwise hands arguments over to pytest.
if [ "$#" = 0 ]; then
if [[ "$SPARK_VERSION" == 2.3* ]] || [[ "$SPARK_VERSION" == 2.4.1* ]] || [[ "$SPARK_VERSION" == 2.4.2* ]]; then
# Delta requires Spark 2.4.2+. We skip the related doctests.
if [[ "$SPARK_VERSION" == 2.3* ]] || [[ "$SPARK_VERSION" == 2.4.1* ]] || [[ "$SPARK_VERSION" == 2.4.2* ]] || [[ "$SPARK_VERSION" == 3.2.* ]]; then
# Delta requires Spark 2.4.2+, and doesn't support Spark 3.2+ yet. We skip the related doctests.
if [[ "$SPARK_VERSION" == 2.3* ]]; then
$PYTHON_EXECUTABLE -m pytest --cov=databricks --cov-report xml:"$FWDIR/coverage.xml" -k "not (melt or to_delta or read_delta or to_clipboard)" --verbose --showlocals --color=yes --doctest-modules databricks "${logopts[@]}"
else
Expand Down

0 comments on commit a7e7bc7

Please sign in to comment.