Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions packages/bigframes/bigframes/core/array_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,17 @@ def filter(self, predicate: ex.Expression):
return arr.drop_columns(filter_ids)

def order_by(
self, by: Sequence[OrderingExpression], is_total_order: bool = False
self,
by: Sequence[OrderingExpression],
is_total_order: bool = False,
stable: bool = True,
) -> ArrayValue:
return ArrayValue(
nodes.OrderByNode(
child=self.node, by=tuple(by), is_total_order=is_total_order
child=self.node,
by=tuple(by),
is_total_order=is_total_order,
stable=stable,
)
)

Expand Down
3 changes: 2 additions & 1 deletion packages/bigframes/bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,10 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
def order_by(
self,
by: typing.Sequence[ordering.OrderingExpression],
stable: bool = True,
) -> Block:
return Block(
self._expr.order_by(by),
self._expr.order_by(by, stable=stable),
index_columns=self.index_columns,
column_labels=self.column_labels,
index_labels=self.index.names,
Expand Down
19 changes: 11 additions & 8 deletions packages/bigframes/bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,6 @@ def query_job(self) -> bigquery.QueryJob:
self._query_job = query_job
return self._query_job

@property
def str(self) -> bigframes.operations.strings.StringMethods:
import bigframes.operations.strings

return bigframes.operations.strings.StringMethods(self)

def get_loc(self, key) -> typing.Union[int, slice, "bigframes.series.Series"]:
"""Get integer location, slice or boolean mask for requested label.

Expand Down Expand Up @@ -436,7 +430,8 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool = True,
na_position: __builtins__.str = "last",
kind: str | None = None,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat use of an existing pandas parameter! (https://pandas.pydata.org/docs/reference/api/pandas.Series.sort_values.html#pandas.Series.sort_values)

Interesting opportunity here too with regards to the default in pandas not being a stable sort.

na_position: str = "last",
) -> Index:
if na_position not in ["first", "last"]:
raise ValueError("Param na_position must be one of 'first' or 'last'")
Expand All @@ -448,7 +443,8 @@ def sort_values(
else order.descending_over(column, na_last)
for column in index_columns
]
return Index(self._block.order_by(ordering))
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
return Index(self._block.order_by(ordering, stable=is_stable))

def astype(
self,
Expand Down Expand Up @@ -840,6 +836,13 @@ def _apply_binary_op(
else:
return NotImplemented

# last so as to not shadow __builtins__.str
@property
def str(self) -> bigframes.operations.strings.StringMethods:
import bigframes.operations.strings

return bigframes.operations.strings.StringMethods(self)


def _should_create_datetime_index(block: blocks.Block) -> bool:
if len(block.index.dtypes) != 1:
Expand Down
3 changes: 2 additions & 1 deletion packages/bigframes/bigframes/core/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,8 @@ def remap_refs(
@dataclasses.dataclass(frozen=True, eq=False)
class OrderByNode(UnaryNode):
by: Tuple[OrderingExpression, ...]
# This is an optimization, if true, can discard previous orderings.
stable: bool = True
# This is an optimization, if true, can discard previous orderings, even if doing a stable sort
# might be a total ordering even if false
is_total_order: bool = False

Expand Down
7 changes: 6 additions & 1 deletion packages/bigframes/bigframes/core/rewrite/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def pull_up_order_inner(
child_result, child_order = pull_up_order_inner(node.child)
return child_result, child_order.with_reverse()
elif isinstance(node, bigframes.core.nodes.OrderByNode):
if node.is_total_order:
# unstable sorts don't care about previous order, total orders override previous order
if (not node.stable) or node.is_total_order:
new_node = remove_order(node.child)
else:
new_node, child_order = pull_up_order_inner(node.child)
Expand Down Expand Up @@ -106,6 +107,10 @@ def pull_up_order_inner(
),
)
)
elif not node.stable:
new_order = bigframes.core.ordering.RowOrdering(
ordering_value_columns=tuple(new_by),
)
else:
assert child_order
new_order = child_order.with_ordering_columns(new_by)
Expand Down
15 changes: 10 additions & 5 deletions packages/bigframes/bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2418,6 +2418,7 @@ def sort_index(
*,
ascending: bool = ...,
inplace: Literal[False] = ...,
kind: str = ...,
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
na_position: Literal["first", "last"] = ...,
) -> DataFrame: ...

Expand All @@ -2427,6 +2428,7 @@ def sort_index(
*,
ascending: bool = ...,
inplace: Literal[True] = ...,
kind: str = ...,
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
na_position: Literal["first", "last"] = ...,
) -> None: ...

Expand All @@ -2436,6 +2438,7 @@ def sort_index(
axis: Union[int, str] = 0,
ascending: bool = True,
inplace: bool = False,
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
) -> Optional[DataFrame]:
if utils.get_axis_number(axis) == 0:
Expand All @@ -2449,7 +2452,8 @@ def sort_index(
else order.descending_over(column, na_last)
for column in index_columns
]
block = self._block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
block = self._block.order_by(ordering, stable=is_stable)
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
else: # axis=1
_, indexer = self.columns.sort_values(
return_indexer=True,
Expand All @@ -2472,7 +2476,7 @@ def sort_values(
*,
inplace: Literal[False] = ...,
ascending: bool | typing.Sequence[bool] = ...,
kind: str = ...,
kind: str | None = None,
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
na_position: typing.Literal["first", "last"] = ...,
) -> DataFrame: ...

Expand All @@ -2483,7 +2487,7 @@ def sort_values(
*,
inplace: Literal[True] = ...,
ascending: bool | typing.Sequence[bool] = ...,
kind: str = ...,
kind: str | None = None,
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
na_position: typing.Literal["first", "last"] = ...,
) -> None: ...

Expand All @@ -2493,7 +2497,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool | typing.Sequence[bool] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: typing.Literal["first", "last"] = "last",
) -> Optional[DataFrame]:
if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
Expand Down Expand Up @@ -2525,7 +2529,8 @@ def sort_values(
if is_ascending
else order.descending_over(column_id, na_last)
)
block = self._block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
block = self._block.order_by(ordering, stable=is_stable)
if inplace:
self._set_block(block)
return None
Expand Down
41 changes: 32 additions & 9 deletions packages/bigframes/bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ def sort_values(
axis=...,
inplace: Literal[True] = ...,
ascending: bool | typing.Sequence[bool] = ...,
kind: str = ...,
kind: str | None = ...,
na_position: typing.Literal["first", "last"] = ...,
) -> None: ...

Expand All @@ -1780,7 +1780,7 @@ def sort_values(
axis=...,
inplace: Literal[False] = ...,
ascending: bool | typing.Sequence[bool] = ...,
kind: str = ...,
kind: str | None = ...,
na_position: typing.Literal["first", "last"] = ...,
) -> Series: ...

Expand All @@ -1790,19 +1790,21 @@ def sort_values(
axis=0,
inplace: bool = False,
ascending=True,
kind: str = "quicksort",
kind: str | None = None,
na_position: typing.Literal["first", "last"] = "last",
) -> Optional[Series]:
if axis != 0 and axis != "index":
raise ValueError(f"No axis named {axis} for object type Series")
if na_position not in ["first", "last"]:
raise ValueError("Param na_position must be one of 'first' or 'last'")
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
block = self._block.order_by(
[
order.ascending_over(self._value_column, (na_position == "last"))
if ascending
else order.descending_over(self._value_column, (na_position == "last"))
],
stable=is_stable,
)
if inplace:
self._set_block(block)
Expand All @@ -1812,17 +1814,37 @@ def sort_values(

@typing.overload # type: ignore[override]
def sort_index(
self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
) -> Series: ...
self,
*,
axis=...,
inplace: Literal[False] = ...,
ascending=...,
kind: str | None = ...,
na_position=...,
) -> Series:
...

@typing.overload
def sort_index(
self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
) -> None: ...
self,
*,
axis=0,
inplace: Literal[True] = ...,
ascending=...,
kind: str | None = ...,
na_position=...,
) -> None:
...

@validations.requires_index
def sort_index(
self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
self,
*,
axis=0,
inplace: bool = False,
ascending=True,
kind: str | None = None,
na_position="last",
) -> Optional[Series]:
# TODO(tbergeron): Support level parameter once multi-index introduced.
if axis != 0 and axis != "index":
Expand All @@ -1837,7 +1859,8 @@ def sort_index(
else order.descending_over(column, na_last)
for column in block.index_columns
]
block = block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
Comment thread
TrevorBergeron marked this conversation as resolved.
Outdated
block = block.order_by(ordering, stable=is_stable)
if inplace:
self._set_block(block)
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@
"_deferred",
]
VALID_WRITE_ENGINES = typing.get_args(WriteEngineType)

DEFAULT_SORT_KIND = "stable"
Comment thread
TrevorBergeron marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -2253,7 +2253,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool | Sequence[bool] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
):
"""Sort by the values along row axis.
Expand Down Expand Up @@ -2339,7 +2339,7 @@ def sort_values(
the by.
inplace (bool, default False):
If True, perform operation in-place.
kind (str, default 'quicksort'):
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
Expand All @@ -2363,6 +2363,7 @@ def sort_index(
axis: str | int = 0,
ascending: bool = True,
inplace: bool = False,
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
):
"""Sort object by labels (along an axis).
Expand All @@ -2375,6 +2376,10 @@ def sort_index(
Sort ascending vs. descending.
inplace (bool, default False):
Whether to modify the DataFrame rather than creating a new one.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first', 'last'}, default 'last'):
Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
Not implemented for MultiIndex.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,11 @@ def nunique(self) -> int:
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def sort_values(
self, *, ascending: bool = True, na_position: str = "last"
self,
*,
ascending: bool = True,
kind: str | None = None,
na_position: str = "last",
) -> Index:
"""
Return a sorted copy of the index.
Expand All @@ -851,6 +855,10 @@ def sort_values(
Args:
ascending (bool, default True):
Should the index values be sorted in an ascending order.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first' or 'last'}, default 'last'):
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1502,7 +1502,7 @@ def sort_values(
axis: Axis = 0,
inplace: bool = False,
ascending: bool | int | Sequence[bool] | Sequence[int] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: str = "last",
):
"""
Expand Down Expand Up @@ -1579,7 +1579,7 @@ def sort_values(
Whether to modify the Series rather than creating a new one.
ascending (bool or list of bools, default True):
If True, sort values in ascending order, otherwise descending.
kind (str, default to 'quicksort'):
kind (str, default to None):
Choice of sorting algorithm. Accepts quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder
Expand All @@ -1599,6 +1599,7 @@ def sort_index(
axis: Axis = 0,
inplace: bool = False,
ascending: bool | Sequence[bool] = True,
kind: str | None = None,
na_position: NaPosition = "last",
):
"""
Expand Down Expand Up @@ -1646,6 +1647,10 @@ def sort_index(
ascending (bool or list-like of bools, default True):
Sort ascending vs. descending. When the index is a MultiIndex the
sort direction can be controlled for each level individually.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first', 'last'}, default 'last'):
If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
Not implemented for MultiIndex.
Expand Down
Loading