Current File : //usr/local/lib64/python3.6/site-packages/pandas/core/ops/__init__.py |
"""
Arithmetic operations for PandasObjects
This is not a public API.
"""
import operator
from typing import TYPE_CHECKING, Optional, Set, Type
import numpy as np
from pandas._libs import lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
from pandas._typing import Level
from pandas.util._decorators import Appender
from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core import algorithms
from pandas.core.construction import extract_array
from pandas.core.ops.array_ops import (
arithmetic_op,
comparison_op,
get_array_op,
logical_op,
)
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.docstrings import (
_arith_doc_FRAME,
_flex_comp_doc_FRAME,
_make_flex_doc,
_op_descriptions,
)
from pandas.core.ops.invalid import invalid_comparison # noqa:F401
from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401
from pandas.core.ops.methods import ( # noqa:F401
add_flex_arithmetic_methods,
add_special_arithmetic_methods,
)
from pandas.core.ops.roperator import ( # noqa:F401
radd,
rand_,
rdiv,
rdivmod,
rfloordiv,
rmod,
rmul,
ror_,
rpow,
rsub,
rtruediv,
rxor,
)
if TYPE_CHECKING:
from pandas import DataFrame, Series # noqa:F401
# -----------------------------------------------------------------------------
# constants
ARITHMETIC_BINOPS: Set[str] = {
"add",
"sub",
"mul",
"pow",
"mod",
"floordiv",
"truediv",
"divmod",
"radd",
"rsub",
"rmul",
"rpow",
"rmod",
"rfloordiv",
"rtruediv",
"rdivmod",
}
COMPARISON_BINOPS: Set[str] = {"eq", "ne", "lt", "gt", "le", "ge"}
# -----------------------------------------------------------------------------
# Ops Wrapping Utilities
def get_op_result_name(left, right):
"""
Find the appropriate name to pin to an operation result. This result
should always be either an Index or a Series.
Parameters
----------
left : {Series, Index}
right : object
Returns
-------
name : object
Usually a string
"""
# `left` is always a Series when called from within ops
if isinstance(right, (ABCSeries, ABCIndexClass)):
name = _maybe_match_name(left, right)
else:
name = left.name
return name
def _maybe_match_name(a, b):
"""
Try to find a name to attach to the result of an operation between
a and b. If only one of these has a `name` attribute, return that
name. Otherwise return a consensus name if they match of None if
they have different names.
Parameters
----------
a : object
b : object
Returns
-------
name : str or None
See Also
--------
pandas.core.common.consensus_name_attr
"""
a_has = hasattr(a, "name")
b_has = hasattr(b, "name")
if a_has and b_has:
if a.name == b.name:
return a.name
else:
# TODO: what if they both have np.nan for their names?
return None
elif a_has:
return a.name
elif b_has:
return b.name
return None
# -----------------------------------------------------------------------------
def _get_frame_op_default_axis(name: str) -> Optional[str]:
"""
Only DataFrame cares about default_axis, specifically:
special methods have default_axis=None and flex methods
have default_axis='columns'.
Parameters
----------
name : str
Returns
-------
default_axis: str or None
"""
if name.replace("__r", "__") in ["__and__", "__or__", "__xor__"]:
# bool methods
return "columns"
elif name.startswith("__"):
# __add__, __mul__, ...
return None
else:
# add, mul, ...
return "columns"
def _get_op_name(op, special: bool) -> str:
"""
Find the name to attach to this method according to conventions
for special and non-special methods.
Parameters
----------
op : binary operator
special : bool
Returns
-------
op_name : str
"""
opname = op.__name__.strip("_")
if special:
opname = f"__{opname}__"
return opname
# -----------------------------------------------------------------------------
# Masking NA values and fallbacks for operations numpy does not support
def fill_binop(left, right, fill_value):
"""
If a non-None fill_value is given, replace null entries in left and right
with this value, but only in positions where _one_ of left/right is null,
not both.
Parameters
----------
left : array-like
right : array-like
fill_value : object
Returns
-------
left : array-like
right : array-like
Notes
-----
Makes copies if fill_value is not None and NAs are present.
"""
if fill_value is not None:
left_mask = isna(left)
right_mask = isna(right)
# one but not both
mask = left_mask ^ right_mask
if left_mask.any():
# Avoid making a copy if we can
left = left.copy()
left[left_mask & mask] = fill_value
if right_mask.any():
# Avoid making a copy if we can
right = right.copy()
right[right_mask & mask] = fill_value
return left, right
# -----------------------------------------------------------------------------
# Dispatch logic
def dispatch_to_series(left, right, func, axis: Optional[int] = None):
"""
Evaluate the frame operation func(left, right) by evaluating
column-by-column, dispatching to the Series implementation.
Parameters
----------
left : DataFrame
right : scalar, Series, or DataFrame
func : arithmetic or comparison operator
axis : {None, 0, 1}
Returns
-------
DataFrame
"""
# Get the appropriate array-op to apply to each column/block's values.
array_op = get_array_op(func)
right = lib.item_from_zerodim(right)
if not is_list_like(right):
# i.e. scalar, faster than checking np.ndim(right) == 0
bm = left._mgr.apply(array_op, right=right)
return type(left)(bm)
elif isinstance(right, ABCDataFrame):
assert left.index.equals(right.index)
assert left.columns.equals(right.columns)
# TODO: The previous assertion `assert right._indexed_same(left)`
# fails in cases with empty columns reached via
# _frame_arith_method_with_reindex
bm = left._mgr.operate_blockwise(right._mgr, array_op)
return type(left)(bm)
elif isinstance(right, ABCSeries) and axis == 1:
# axis=1 means we want to operate row-by-row
assert right.index.equals(left.columns)
right = right._values
# maybe_align_as_frame ensures we do not have an ndarray here
assert not isinstance(right, np.ndarray)
arrays = [array_op(l, r) for l, r in zip(left._iter_column_arrays(), right)]
elif isinstance(right, ABCSeries):
assert right.index.equals(left.index) # Handle other cases later
right = right._values
arrays = [array_op(l, right) for l in left._iter_column_arrays()]
else:
# Remaining cases have less-obvious dispatch rules
raise NotImplementedError(right)
return type(left)._from_arrays(
arrays, left.columns, left.index, verify_integrity=False
)
# -----------------------------------------------------------------------------
# Series
def _align_method_SERIES(left: "Series", right, align_asobject: bool = False):
""" align lhs and rhs Series """
# ToDo: Different from _align_method_FRAME, list, tuple and ndarray
# are not coerced here
# because Series has inconsistencies described in #13637
if isinstance(right, ABCSeries):
# avoid repeated alignment
if not left.index.equals(right.index):
if align_asobject:
# to keep original value's dtype for bool ops
left = left.astype(object)
right = right.astype(object)
left, right = left.align(right, copy=False)
return left, right
def _arith_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
op_name = _get_op_name(op, special)
@unpack_zerodim_and_defer(op_name)
def wrapper(left, right):
left, right = _align_method_SERIES(left, right)
res_name = get_op_result_name(left, right)
lvalues = extract_array(left, extract_numpy=True)
rvalues = extract_array(right, extract_numpy=True)
result = arithmetic_op(lvalues, rvalues, op)
return left._construct_result(result, name=res_name)
wrapper.__name__ = op_name
return wrapper
def _comp_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
op_name = _get_op_name(op, special)
@unpack_zerodim_and_defer(op_name)
def wrapper(self, other):
res_name = get_op_result_name(self, other)
if isinstance(other, ABCSeries) and not self._indexed_same(other):
raise ValueError("Can only compare identically-labeled Series objects")
lvalues = extract_array(self, extract_numpy=True)
rvalues = extract_array(other, extract_numpy=True)
res_values = comparison_op(lvalues, rvalues, op)
return self._construct_result(res_values, name=res_name)
wrapper.__name__ = op_name
return wrapper
def _bool_method_SERIES(cls, op, special):
"""
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
assert special # non-special uses _flex_method_SERIES
op_name = _get_op_name(op, special)
@unpack_zerodim_and_defer(op_name)
def wrapper(self, other):
self, other = _align_method_SERIES(self, other, align_asobject=True)
res_name = get_op_result_name(self, other)
lvalues = extract_array(self, extract_numpy=True)
rvalues = extract_array(other, extract_numpy=True)
res_values = logical_op(lvalues, rvalues, op)
return self._construct_result(res_values, name=res_name)
wrapper.__name__ = op_name
return wrapper
def _flex_method_SERIES(cls, op, special):
assert not special # "special" also means "not flex"
name = _get_op_name(op, special)
doc = _make_flex_doc(name, "series")
@Appender(doc)
def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# validate axis
if axis is not None:
self._get_axis_number(axis)
if isinstance(other, ABCSeries):
return self._binop(other, op, level=level, fill_value=fill_value)
elif isinstance(other, (np.ndarray, list, tuple)):
if len(other) != len(self):
raise ValueError("Lengths must be equal")
other = self._constructor(other, self.index)
return self._binop(other, op, level=level, fill_value=fill_value)
else:
if fill_value is not None:
self = self.fillna(fill_value)
return op(self, other)
flex_wrapper.__name__ = name
return flex_wrapper
# -----------------------------------------------------------------------------
# DataFrame
def _align_method_FRAME(
left, right, axis, flex: Optional[bool] = False, level: Level = None
):
"""
Convert rhs to meet lhs dims if input is list, tuple or np.ndarray.
Parameters
----------
left : DataFrame
right : Any
axis: int, str, or None
flex: bool or None, default False
Whether this is a flex op, in which case we reindex.
None indicates not to check for alignment.
level : int or level name, default None
Returns
-------
left : DataFrame
right : Any
"""
def to_series(right):
msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
if axis is not None and left._get_axis_name(axis) == "index":
if len(left.index) != len(right):
raise ValueError(
msg.format(req_len=len(left.index), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.index)
else:
if len(left.columns) != len(right):
raise ValueError(
msg.format(req_len=len(left.columns), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.columns)
return right
if isinstance(right, np.ndarray):
if right.ndim == 1:
right = to_series(right)
elif right.ndim == 2:
if right.shape == left.shape:
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
# Broadcast across columns
right = np.broadcast_to(right, left.shape)
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
# Broadcast along rows
right = to_series(right[0, :])
else:
raise ValueError(
"Unable to coerce to DataFrame, shape "
f"must be {left.shape}: given {right.shape}"
)
elif right.ndim > 2:
raise ValueError(
"Unable to coerce to Series/DataFrame, "
f"dimension must be <= 2: {right.shape}"
)
elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
# GH17901
right = to_series(right)
if flex is not None and isinstance(right, ABCDataFrame):
if not left._indexed_same(right):
if flex:
left, right = left.align(right, join="outer", level=level, copy=False)
else:
raise ValueError(
"Can only compare identically-labeled DataFrame objects"
)
elif isinstance(right, ABCSeries):
# axis=1 is default for DataFrame-with-Series op
axis = left._get_axis_number(axis) if axis is not None else 1
left, right = left.align(
right, join="outer", axis=axis, level=level, copy=False
)
right = _maybe_align_series_as_frame(left, right, axis)
return left, right
def _should_reindex_frame_op(
left: "DataFrame", right, op, axis, default_axis, fill_value, level
) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
"""
assert isinstance(left, ABCDataFrame)
if op is operator.pow or op is rpow:
# GH#32685 pow has special semantics for operating with null values
return False
if not isinstance(right, ABCDataFrame):
return False
if fill_value is None and level is None and axis is default_axis:
# TODO: any other cases we should handle here?
cols = left.columns.intersection(right.columns)
# Intersection is always unique so we have to check the unique columns
left_uniques = left.columns.unique()
right_uniques = right.columns.unique()
if not (cols.equals(left_uniques) and cols.equals(right_uniques)):
return True
return False
def _frame_arith_method_with_reindex(
left: "DataFrame", right: "DataFrame", op
) -> "DataFrame":
"""
For DataFrame-with-DataFrame operations that require reindexing,
operate only on shared columns, then reindex.
Parameters
----------
left : DataFrame
right : DataFrame
op : binary operator
Returns
-------
DataFrame
"""
# GH#31623, only operate on shared columns
cols, lcols, rcols = left.columns.join(
right.columns, how="inner", level=None, return_indexers=True
)
new_left = left.iloc[:, lcols]
new_right = right.iloc[:, rcols]
result = op(new_left, new_right)
# Do the join on the columns instead of using _align_method_FRAME
# to avoid constructing two potentially large/sparse DataFrames
join_columns, _, _ = left.columns.join(
right.columns, how="outer", level=None, return_indexers=True
)
if result.columns.has_duplicates:
# Avoid reindexing with a duplicate axis.
# https://github.com/pandas-dev/pandas/issues/35194
indexer, _ = result.columns.get_indexer_non_unique(join_columns)
indexer = algorithms.unique1d(indexer)
result = result._reindex_with_indexers(
{1: [join_columns, indexer]}, allow_dups=True
)
else:
result = result.reindex(join_columns, axis=1)
return result
def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int):
"""
If the Series operand is not EA-dtype, we can broadcast to 2D and operate
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]":
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
else:
return series
if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, frame.shape)
return type(frame)(rvalues, index=frame.index, columns=frame.columns)
def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool):
# This is the only function where `special` can be either True or False
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)
na_op = get_array_op(op)
if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
doc = _make_flex_doc(op_name, "dataframe")
else:
doc = _arith_doc_FRAME % op_name
@Appender(doc)
def f(self, other, axis=default_axis, level=None, fill_value=None):
if _should_reindex_frame_op(
self, other, op, axis, default_axis, fill_value, level
):
return _frame_arith_method_with_reindex(self, other, op)
if isinstance(other, ABCSeries) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")
axis = self._get_axis_number(axis) if axis is not None else 1
# TODO: why are we passing flex=True instead of flex=not special?
# 15 tests fail if we pass flex=not special instead
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)
if isinstance(other, ABCDataFrame):
# Another DataFrame
new_data = self._combine_frame(other, na_op, fill_value)
elif isinstance(other, ABCSeries):
new_data = dispatch_to_series(self, other, op, axis=axis)
else:
# in this case we always have `np.ndim(other) == 0`
if fill_value is not None:
self = self.fillna(fill_value)
new_data = dispatch_to_series(self, other, op)
return self._construct_result(new_data)
f.__name__ = op_name
return f
def _flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool):
assert not special # "special" also means "not flex"
op_name = _get_op_name(op, special)
default_axis = _get_frame_op_default_axis(op_name)
assert default_axis == "columns", default_axis # because we are not "special"
doc = _flex_comp_doc_FRAME.format(
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
)
@Appender(doc)
def f(self, other, axis=default_axis, level=None):
axis = self._get_axis_number(axis) if axis is not None else 1
self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)
new_data = dispatch_to_series(self, other, op, axis=axis)
return self._construct_result(new_data)
f.__name__ = op_name
return f
def _comp_method_FRAME(cls: Type["DataFrame"], op, special: bool):
assert special # "special" also means "not flex"
op_name = _get_op_name(op, special)
@Appender(f"Wrapper for comparison method {op_name}")
def f(self, other):
axis = 1 # only relevant for Series other case
self, other = _align_method_FRAME(self, other, axis, level=None, flex=False)
# See GH#4537 for discussion of scalar op behavior
new_data = dispatch_to_series(self, other, op, axis=axis)
return self._construct_result(new_data)
f.__name__ = op_name
return f