Source code for bigframes.operations.datetimes
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import datetime as dt
from typing import Generic, Literal, Optional, TypeVar
import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike
import bigframes_vendored.pandas.core.indexes.accessor as vendordt
import pandas
from bigframes import dataframe, dtypes, series
from bigframes._tools import docs
import bigframes.core.col
import bigframes.core.indexes.base as indices
from bigframes.core.logging import log_adapter
import bigframes.operations as ops
_ONE_DAY = pandas.Timedelta("1D")
_ONE_SECOND = pandas.Timedelta("1s")
_ONE_MICRO = pandas.Timedelta("1us")
_SUPPORTED_FREQS = ("Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us")
T = TypeVar("T", series.Series, indices.Index, bigframes.core.col.Expression)
# Simpler base class for datetime properties, excludes isocalendar, unit, tz
class DatetimeSimpleMethods(Generic[T]):
def __init__(self, data: T):
self._data: T = data
# Date accessors
@property
def day(self) -> T:
return self._data._apply_unary_op(ops.day_op)
@property
def dayofweek(self) -> T:
return self._data._apply_unary_op(ops.dayofweek_op)
@property
def day_of_week(self) -> T:
return self.dayofweek
@property
def weekday(self) -> T:
return self.dayofweek
@property
def dayofyear(self) -> T:
return self._data._apply_unary_op(ops.dayofyear_op)
@property
def day_of_year(self) -> T:
return self.dayofyear
@property
def date(self) -> T:
return self._data._apply_unary_op(ops.date_op)
@property
def quarter(self) -> T:
return self._data._apply_unary_op(ops.quarter_op)
@property
def year(self) -> T:
return self._data._apply_unary_op(ops.year_op)
@property
def month(self) -> T:
return self._data._apply_unary_op(ops.month_op)
# Time accessors
@property
def hour(self) -> T:
return self._data._apply_unary_op(ops.hour_op)
@property
def minute(self) -> T:
return self._data._apply_unary_op(ops.minute_op)
@property
def second(self) -> T:
return self._data._apply_unary_op(ops.second_op)
@property
def time(self) -> T:
return self._data._apply_unary_op(ops.time_op)
# Timedelta accessors
@property
def days(self) -> T:
self._check_dtype(dtypes.TIMEDELTA_DTYPE)
return self._data._apply_binary_op(_ONE_DAY, ops.floordiv_op)
@property
def seconds(self) -> T:
self._check_dtype(dtypes.TIMEDELTA_DTYPE)
return self._data._apply_binary_op(_ONE_DAY, ops.mod_op) // _ONE_SECOND # type: ignore
@property
def microseconds(self) -> T:
self._check_dtype(dtypes.TIMEDELTA_DTYPE)
return self._data._apply_binary_op(_ONE_SECOND, ops.mod_op) // _ONE_MICRO # type: ignore
def total_seconds(self) -> T:
self._check_dtype(dtypes.TIMEDELTA_DTYPE)
return self._data._apply_binary_op(_ONE_SECOND, ops.div_op)
def _check_dtype(self, target_dtype: dtypes.Dtype):
if isinstance(self._data, (indices.Index, series.Series)):
if self._data.dtype != target_dtype:
raise TypeError(
f"Expect dtype: {target_dtype}, but got {self._data.dtype}"
)
return
def tz_localize(self, tz: Literal["UTC"] | None) -> T:
if tz == "UTC":
return self._data._apply_unary_op(ops.ToTimestampOp())
if tz is None:
return self._data._apply_unary_op(ops.ToDatetimeOp())
raise ValueError(f"Unsupported timezone {tz}")
def day_name(self) -> T:
return self.strftime("%A")
def strftime(self, date_format: str) -> T:
return self._data._apply_unary_op(ops.StrftimeOp(date_format=date_format))
def normalize(self) -> T:
return self._data._apply_unary_op(ops.normalize_op)
def floor(self, freq: str) -> T:
if freq not in _SUPPORTED_FREQS:
raise ValueError(f"freq must be one of {_SUPPORTED_FREQS}")
return self._data._apply_unary_op(ops.FloorDtOp(freq=freq)) # type: ignore
# this is the version used by series.dt, and the one that shows up in reference docs
[docs]
@log_adapter.class_logger
@docs.inherit_docs(vendordt.DatetimeProperties)
@docs.inherit_docs(vendored_pandas_datetimelike.DatelikeOps)
class DatetimeMethods(DatetimeSimpleMethods[bigframes.series.Series]):
[docs]
def __init__(self, data: series.Series):
super().__init__(data)
@property
def tz(self) -> Optional[dt.timezone]:
# Assumption: pyarrow dtype
tz_string = self._data._dtype.pyarrow_dtype.tz
if tz_string == "UTC":
return dt.timezone.utc
elif tz_string is None:
return None
else:
raise ValueError(f"Unexpected timezone {tz_string}")
@property
def unit(self) -> str:
# Assumption: pyarrow dtype
return self._data._dtype.pyarrow_dtype.unit
[docs]
def isocalendar(self) -> dataframe.DataFrame:
iso_ops = [ops.iso_year_op, ops.iso_week_op, ops.iso_day_op]
labels = pandas.Index(["year", "week", "day"])
block = self._data._block.project_exprs(
[op.as_expr(self._data._value_column) for op in iso_ops], labels, drop=True
)
return dataframe.DataFrame(block)