Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible Solutions to Issues 26216, 25677, and 25589 #26276

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 30 additions & 0 deletions numpy/lib/_function_base_impl.py
Expand Up @@ -3866,9 +3866,39 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):
np.float64(3.5)
>>> assert not np.all(a==b)

"""

"""
Addressing issue 26216
Calculate the median date by sorting the list of np.datetime64 values and selecting either
the middle value if the list has an odd length or the middle two values if the list has an even length.

If the list is odd, returns the middle date, and if the list is even, it calculates the average distance between
the middle two dates, adding this average distance to the earlier middle date.
"""

# handle edge case where a is a Python list
if isinstance(a, list):
a = np.array(a)

if a.size > 0 and len(a.shape) > 0 and all(isinstance(i, np.datetime64) for i in a):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't take into account any of the keepdims, axis, out or overwrite_input arguments.

a = sorted(a)
if len(a) % 2 != 0:
return a[len(a)//2]
else:
second = a[len(a)//2]
first = a[len(a)//2 - 1]
days_between = (second - first)//2
median_date = np.datetime64(first + days_between)
return median_date
else:
return _ureduce(a, func=_median, keepdims=keepdims, axis=axis, out=out,
overwrite_input=overwrite_input)

"""
return _ureduce(a, func=_median, keepdims=keepdims, axis=axis, out=out,
overwrite_input=overwrite_input)
"""


def _median(a, axis=None, out=None, overwrite_input=False):
Expand Down
47 changes: 45 additions & 2 deletions numpy/ma/core.py
Expand Up @@ -165,6 +165,7 @@ class MaskError(MAError):


# b: boolean - c: complex - f: floats - i: integer - O: object - S: string
"""
default_filler = {'b': True,
'c': 1.e20 + 0.0j,
'f': 1.e20,
Expand All @@ -175,6 +176,31 @@ class MaskError(MAError):
'V': b'???',
'U': 'N/A'
}
"""

"""
Addressing issue 25677
Added different custom fill values for different ints and floats.
"""
default_filler = {'b': True,
'c': 1.e20 + 0.0j,
"float16": 65504,
"float32": pow(2, 31) - 1,
"float64": pow(2, 63) - 1,
"int8": 127,
"uint8": 255,
'int16': 32767,
"uint16": 65535,
"int32": 2147483647,
"uint32": 4294967295,
"int64": 9223372036854775807,
"uint64": pow(2, 64) - 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all of the integer dtypes that can represent 999999 should retain that value. Otherwise, we'd have to change all of those tests that are now failing. Similarly, the floating point dtypes should retain the old value.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thanks for letting me know. I reverted the int types back as follows:

default_filler = {'b': True, 'c': 1.e20 + 0.0j, "float16": 65504, "float32": 1.e20, "float64": 1.e20, "i": 999999, 'O': '?', 'S': b'N/A', 'V': b'???', 'U': 'N/A' }

However, float16 remains 65504 to avoid the inf value per the original issue (#25677).

Does this make sense?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#25677 is still unresolved for the smaller integer types. Write out the unit tests that test for the behavior that you want. Demonstrate (to yourself) that they fail without your changes, then add your changes until the tests pass.

"i": 999999,
'O': '?',
'S': b'N/A',
'V': b'???',
'U': 'N/A'
}

# Add datetime64 and timedelta64 types
for v in ["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps",
Expand Down Expand Up @@ -298,7 +324,18 @@ def _scalar_fill_value(dtype):
if dtype.kind in 'Mm':
return default_filler.get(dtype.str[1:], '?')
else:
return default_filler.get(dtype.kind, '?')
"""
Addressing issue 25677
Using str(dtype) acts as the key in the default_filler dictionary to retrieve the
custom value.

If this key does not exist in default_filler, then dtype.kind is used as the key instead.
"""
if default_filler.get(str(dtype)) is not None:
return default_filler.get(str(dtype), '?')
else:
return default_filler.get(dtype.kind, '?')
# return default_filler.get(dtype.kind, '?')

dtype = _get_dtype_of(obj)
return _recursive_fill_value(dtype, _scalar_fill_value)
Expand Down Expand Up @@ -7085,7 +7122,12 @@ def power(a, b, third=None):
# Get the masks
ma = getmask(a)
mb = getmask(b)
m = mask_or(ma, mb)
# m = mask_or(ma, mb)
"""
Addressing issue 25589
Added shrink = False on line 7130 to preserve the existing mask.
20revsined marked this conversation as resolved.
Show resolved Hide resolved
"""
m = mask_or(ma, mb, shrink=False)
# Get the rawdata
fa = getdata(a)
fb = getdata(b)
Expand All @@ -7112,6 +7154,7 @@ def power(a, b, third=None):
elif result._mask is nomask:
result._mask = invalid
result._data[invalid] = result.fill_value
# result.mask = a.mask
return result

argmin = _frommethod('argmin')
Expand Down