# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
from typing import Mapping, Optional, Union
import shapely # type: ignore
from bigframes import operations as ops
import bigframes.dataframe
import bigframes.geopandas
import bigframes.series
"""
Search functions defined from
https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions
"""
[docs]
def st_area(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
) -> bigframes.series.Series:
"""
Returns the area in square meters covered by the polygons in the input
`GEOGRAPHY`.
If geography_expression is a point or a line, returns zero. If
geography_expression is a collection, returns the area of the polygons
in the collection; if the collection doesn't contain polygons, returns zero.
.. note::
BigQuery's Geography functions, like `st_area`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Polygon, LineString, Point
>>> series = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]),
... Polygon([(0.10, 0.4), (0.9, 0.5), (0.10, 0.5)]),
... Polygon([(0.1, 0.1), (0.2, 0.1), (0.2, 0.2)]),
... LineString([(0, 0), (1, 1), (0, 1)]),
... Point(0, 1),
... ]
... )
>>> series
0 POLYGON ((0 0, 0.1 0.1, 0 0.1, 0 0))
1 POLYGON ((0.1 0.4, 0.9 0.5, 0.1 0.5, 0.1 0.4))
2 POLYGON ((0.1 0.1, 0.2 0.1, 0.2 0.2, 0.1 0.1))
3 LINESTRING (0 0, 1 1, 0 1)
4 POINT (0 1)
dtype: geometry
>>> bbq.st_area(series)
0 61821689.855985
1 494563347.88721
2 61821689.855841
3 0.0
4 0.0
dtype: Float64
Use `round()` to round the outputed areas to the neares ten millions
>>> bbq.st_area(series).round(-7)
0 60000000.0
1 490000000.0
2 60000000.0
3 0.0
4 0.0
dtype: Float64
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
Returns:
bigframes.pandas.Series:
Series of float representing the areas.
"""
series = series._apply_unary_op(ops.geo_area_op)
series.name = None
return series
[docs]
def st_buffer(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
buffer_radius: float,
num_seg_quarter_circle: float = 8.0,
use_spheroid: bool = False,
) -> bigframes.series.Series:
"""
Computes a `GEOGRAPHY` that represents all points whose distance from the
input `GEOGRAPHY` is less than or equal to `distance` meters.
.. note::
BigQuery's Geography functions, like `st_buffer`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Point
>>> series = bigframes.geopandas.GeoSeries(
... [
... Point(0, 0),
... Point(1, 1),
... ]
... )
>>> series
0 POINT (0 0)
1 POINT (1 1)
dtype: geometry
>>> buffer = bbq.st_buffer(series, 100)
>>> bbq.st_area(buffer) > 0
0 True
1 True
dtype: boolean
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
buffer_radius (float):
The distance in meters.
num_seg_quarter_circle (float, optional):
Specifies the number of segments that are used to approximate a
quarter circle. The default value is 8.0.
use_spheroid (bool, optional):
Determines how this function measures distance. If use_spheroid is
FALSE, the function measures distance on the surface of a perfect
sphere. The use_spheroid parameter currently only supports the
value FALSE. The default value of use_spheroid is FALSE.
Returns:
bigframes.pandas.Series:
A series of geography objects representing the buffered geometries.
"""
op = ops.GeoStBufferOp(
buffer_radius=buffer_radius,
num_seg_quarter_circle=num_seg_quarter_circle,
use_spheroid=use_spheroid,
)
series = series._apply_unary_op(op)
series.name = None
return series
[docs]
def st_centroid(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
) -> bigframes.series.Series:
"""
Computes the geometric centroid of a `GEOGRAPHY` type.
For `POINT` and `MULTIPOINT` types, this is the arithmetic mean of the
input coordinates. For `LINESTRING` and `POLYGON` types, this is the
center of mass. For `GEOMETRYCOLLECTION` types, this is the center of
mass of the collection's elements.
.. note::
BigQuery's Geography functions, like `st_centroid`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Polygon, LineString, Point
>>> series = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]),
... LineString([(0, 0), (1, 1), (0, 1)]),
... Point(0, 1),
... ]
... )
>>> series
0 POLYGON ((0 0, 0.1 0.1, 0 0.1, 0 0))
1 LINESTRING (0 0, 1 1, 0 1)
2 POINT (0 1)
dtype: geometry
>>> bbq.st_centroid(series)
0 POINT (0.03333 0.06667)
1 POINT (0.49998 0.70712)
2 POINT (0 1)
dtype: geometry
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
Returns:
bigframes.pandas.Series:
A series of geography objects representing the centroids.
"""
series = series._apply_unary_op(ops.geo_st_centroid_op)
series.name = None
return series
[docs]
def st_convexhull(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
) -> bigframes.series.Series:
"""
Computes the convex hull of a `GEOGRAPHY` type.
The convex hull is the smallest convex set that contains all of the
points in the input `GEOGRAPHY`.
.. note::
BigQuery's Geography functions, like `st_convexhull`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Polygon, LineString, Point
>>> series = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]),
... LineString([(0, 0), (1, 1), (0, 1)]),
... Point(0, 1),
... ]
... )
>>> series
0 POLYGON ((0 0, 0.1 0.1, 0 0.1, 0 0))
1 LINESTRING (0 0, 1 1, 0 1)
2 POINT (0 1)
dtype: geometry
>>> bbq.st_convexhull(series)
0 POLYGON ((0 0, 0.1 0.1, 0 0.1, 0 0))
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 POINT (0 1)
dtype: geometry
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
Returns:
bigframes.pandas.Series:
A series of geography objects representing the convex hulls.
"""
series = series._apply_unary_op(ops.geo_st_convexhull_op)
series.name = None
return series
[docs]
def st_difference(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series,
bigframes.geopandas.GeoSeries,
shapely.geometry.base.BaseGeometry,
],
) -> bigframes.series.Series:
"""
Returns a `GEOGRAPHY` that represents the point set difference of
`geography_1` and `geography_2`. Therefore, the result consists of the part
of `geography_1` that doesn't intersect with `geography_2`.
If `geometry_1` is completely contained in `geometry_2`, then `ST_DIFFERENCE`
returns an empty `GEOGRAPHY`.
.. note::
BigQuery's Geography functions, like `st_difference`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes as bpd
>>> import bigframes.bigquery as bbq
>>> import bigframes.geopandas
>>> from shapely.geometry import Polygon, LineString, Point
We can check two GeoSeries against each other, row by row:
>>> s1 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )
>>> s1
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry
>>> bbq.st_difference(s1, s2)
0 None
1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1))
2 LINESTRING (0 0, 1 1.00046, 2 2)
3 GEOMETRYCOLLECTION EMPTY
4 POINT (0 1)
5 None
dtype: geometry
Additionally, we can check difference of a GeoSeries against a single shapely geometry:
>>> polygon = Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
>>> bbq.st_difference(s1, polygon)
0 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
1 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
2 GEOMETRYCOLLECTION EMPTY
3 LINESTRING (0.99265 1.00781, 0 2)
4 POINT (0 1)
dtype: geometry
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to subtract from the geography
objects in ``series``.
Returns:
bigframes.series.Series:
A GeoSeries of the points in each aligned geometry that are not
in other.
"""
return series._apply_binary_op(other, ops.geo_st_difference_op)
[docs]
def st_distance(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series,
bigframes.geopandas.GeoSeries,
shapely.geometry.base.BaseGeometry,
],
*,
use_spheroid: bool = False,
) -> bigframes.series.Series:
"""
Returns the shortest distance in meters between two non-empty
``GEOGRAPHY`` objects.
**Examples:**
>>> import bigframes as bpd
>>> import bigframes.bigquery as bbq
>>> import bigframes.geopandas
>>> from shapely.geometry import Polygon, LineString, Point
We can check two GeoSeries against each other, row by row.
>>> s1 = bigframes.geopandas.GeoSeries(
... [
... Point(0, 0),
... Point(0.00001, 0),
... Point(0.00002, 0),
... ],
... )
>>> s2 = bigframes.geopandas.GeoSeries(
... [
... Point(0.00001, 0),
... Point(0.00003, 0),
... Point(0.00005, 0),
... ],
... )
>>> bbq.st_distance(s1, s2, use_spheroid=True)
0 1.113195
1 2.22639
2 3.339585
dtype: Float64
We can also calculate the distance of each geometry and a single shapely geometry:
>>> bbq.st_distance(s2, Point(0.00001, 0))
0 0.0
1 2.223902
2 4.447804
dtype: Float64
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to calculate the distance in meters
to form the geography objects in ``series``.
use_spheroid (optional, default ``False``):
Determines how this function measures distance. If ``use_spheroid``
is False, the function measures distance on the surface of a perfect
sphere. If ``use_spheroid`` is True, the function measures distance
on the surface of the `WGS84 spheroid
<https://cloud.google.com/bigquery/docs/geospatial-data>`_. The
default value of ``use_spheroid`` is False.
Returns:
bigframes.pandas.Series:
The Series (elementwise) of the smallest distance between
each aligned geometry with other.
"""
return series._apply_binary_op(
other, ops.GeoStDistanceOp(use_spheroid=use_spheroid)
)
[docs]
def st_intersection(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series,
bigframes.geopandas.GeoSeries,
shapely.geometry.base.BaseGeometry,
],
) -> bigframes.series.Series:
"""
Returns a `GEOGRAPHY` that represents the point set intersection of the two
input `GEOGRAPHYs`. Thus, every point in the intersection appears in both
`geography_1` and `geography_2`.
.. note::
BigQuery's Geography functions, like `st_intersection`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes as bpd
>>> import bigframes.bigquery as bbq
>>> import bigframes.geopandas
>>> from shapely.geometry import Polygon, LineString, Point
We can check two GeoSeries against each other, row by row.
>>> s1 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
... LineString([(2, 0), (0, 2)]),
... Point(0, 1),
... ],
... )
>>> s2 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
... Point(1, 1),
... Point(0, 1),
... ],
... index=range(1, 6),
... )
>>> s1
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 POLYGON ((0 0, 2 2, 0 2, 0 0))
2 LINESTRING (0 0, 2 2)
3 LINESTRING (2 0, 0 2)
4 POINT (0 1)
dtype: geometry
>>> s2
1 POLYGON ((0 0, 1 1, 0 1, 0 0))
2 LINESTRING (1 0, 1 3)
3 LINESTRING (2 0, 0 2)
4 POINT (1 1)
5 POINT (0 1)
dtype: geometry
>>> bbq.st_intersection(s1, s2)
0 None
1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
2 POINT (1 1.00046)
3 LINESTRING (2 0, 0 2)
4 GEOMETRYCOLLECTION EMPTY
5 None
dtype: geometry
We can also do intersection of each geometry and a single shapely geometry:
>>> bbq.st_intersection(s1, Polygon([(0, 0), (1, 1), (0, 1)]))
0 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
2 LINESTRING (0 0, 0.99954 1)
3 GEOMETRYCOLLECTION EMPTY
4 POINT (0 1)
dtype: geometry
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to intersect with the geography
objects in ``series``.
Returns:
bigframes.geopandas.GeoSeries:
The Geoseries (elementwise) of the intersection of points in
each aligned geometry with other.
"""
return series._apply_binary_op(other, ops.geo_st_intersection_op)
[docs]
def st_isclosed(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
) -> bigframes.series.Series:
"""
Returns TRUE for a non-empty Geography, where each element in the
Geography has an empty boundary.
.. note::
BigQuery's Geography functions, like `st_isclosed`, interpret the geometry
data type as a point set on the Earth's surface. A point set is a set
of points, lines, and polygons on the WGS84 reference spheroid, with
geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Point, LineString, Polygon
>>> series = bigframes.geopandas.GeoSeries(
... [
... Point(0, 0), # Point
... LineString([(0, 0), (1, 1)]), # Open LineString
... LineString([(0, 0), (1, 1), (0, 1), (0, 0)]), # Closed LineString
... Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]),
... None,
... ]
... )
>>> series
0 POINT (0 0)
1 LINESTRING (0 0, 1 1)
2 LINESTRING (0 0, 1 1, 0 1, 0 0)
3 POLYGON ((0 0, 1 1, 0 1, 0 0))
4 None
dtype: geometry
>>> bbq.st_isclosed(series)
0 True
1 False
2 True
3 False
4 <NA>
dtype: boolean
Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
Returns:
bigframes.pandas.Series:
Series of booleans indicating whether each geometry is closed.
"""
series = series._apply_unary_op(ops.geo_st_isclosed_op)
series.name = None
return series
[docs]
def st_length(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
*,
use_spheroid: bool = False,
) -> bigframes.series.Series:
"""Returns the total length in meters of the lines in the input GEOGRAPHY.
If a series element is a point or a polygon, returns zero for that row.
If a series element is a collection, returns the length of the lines
in the collection; if the collection doesn't contain lines, returns
zero.
The optional use_spheroid parameter determines how this function
measures distance. If use_spheroid is FALSE, the function measures
distance on the surface of a perfect sphere.
The use_spheroid parameter currently only supports the value FALSE. The
default value of use_spheroid is FALSE. See:
https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_length
**Examples:**
>>> import bigframes.geopandas
>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> from shapely.geometry import Polygon, LineString, Point, GeometryCollection
>>> series = bigframes.geopandas.GeoSeries(
... [
... LineString([(0, 0), (1, 0)]), # Length will be approx 1 degree in meters
... Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]), # Length is 0
... Point(0, 1), # Length is 0
... GeometryCollection([LineString([(0,0),(0,1)]), Point(1,1)]) # Length of LineString only
... ]
... )
>>> result = bbq.st_length(series)
>>> result
0 111195.101177
1 0.0
2 0.0
3 111195.101177
dtype: Float64
Args:
series (bigframes.series.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
use_spheroid (bool, optional):
Determines how this function measures distance.
If FALSE (default), measures distance on a perfect sphere.
Currently, only FALSE is supported.
Returns:
bigframes.series.Series:
Series of floats representing the lengths in meters.
"""
series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid))
series.name = None
return series
[docs]
def st_regionstats(
geography: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
raster_id: str,
band: Optional[str] = None,
include: Optional[str] = None,
options: Optional[Mapping[str, Union[str, int, float]]] = None,
) -> bigframes.series.Series:
"""Returns statistics summarizing the pixel values of the raster image
referenced by raster_id that intersect with geography.
The statistics include the count, minimum, maximum, sum, standard
deviation, mean, and area of the valid pixels of the raster band named
band_name. Google Earth Engine computes the results of the function call.
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_regionstats
Args:
geography (bigframes.series.Series | bigframes.geopandas.GeoSeries):
A series of geography objects to intersect with the raster image.
raster_id (str):
A string that identifies a raster image. The following formats are
supported. A URI from an image table provided by Google Earth Engine
in BigQuery sharing (formerly Analytics Hub). A URI for a readable
GeoTIFF raster file. A Google Earth Engine asset path that
references public catalog data or project-owned assets with read
access.
band (Optional[str]):
A string in one of the following formats:
A single band within the raster image specified by raster_id. A
formula to compute a value from the available bands in the raster
image. The formula uses the Google Earth Engine image expression
syntax. Bands can be referenced by their name, band_name, in
expressions. If you don't specify a band, the first band of the
image is used.
include (Optional[str]):
An optional string formula that uses the Google Earth Engine image
expression syntax to compute a pixel weight. The formula should
return values from 0 to 1. Values outside this range are set to the
nearest limit, either 0 or 1. A value of 0 means that the pixel is
invalid and it's excluded from analysis. A positive value means that
a pixel is valid. Values between 0 and 1 represent proportional
weights for calculations, such as weighted means.
options (Mapping[str, Union[str, int, float]], optional):
A dictionary of options to pass to the function. See the BigQuery
documentation for a list of available options.
Returns:
bigframes.pandas.Series:
A STRUCT Series containing the computed statistics.
"""
op = ops.GeoStRegionStatsOp(
raster_id=raster_id,
band=band,
include=include,
options=json.dumps(options) if options else None,
)
return geography._apply_unary_op(op)
[docs]
def st_simplify(
geography: "bigframes.series.Series",
tolerance_meters: float,
) -> "bigframes.series.Series":
"""Returns a simplified version of the input geography.
Args:
geography (bigframes.series.Series):
A Series containing GEOGRAPHY data.
tolerance_meters (float):
A float64 value indicating the tolerance in meters.
Returns:
a Series containing the simplified GEOGRAPHY data.
"""
return geography._apply_unary_op(
ops.GeoStSimplifyOp(tolerance_meters=tolerance_meters)
)