# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Analyzing movie posters with BigQuery Dataframe AI functions#
Run in Colab
|
|
|
BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old
movie posters. These posters are images stored in a public Google Cloud Storage bucket: gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters
Set up#
Before you begin, you need to
Set up your permissions for generative AI functions with these instructions
Set up your Cloud Resource connection by following these instructions
Once you have the permissions set up, import the bigframes.pandas package, and
set your cloud project ID.
import bigframes.pandas as bpd
MY_PROJECT_ID = "bigframes-dev" # @param {type:"string"}
LOCATION = "us" # @param {type:"string"}
bpd.options.bigquery.project = MY_PROJECT_ID
bpd.options.bigquery.location = LOCATION
Load data#
First, you load the data from the GCS bucket to a BigQuery Dataframe:
# Replace with your own connection name.
MY_CONNECTION = 'bigframes-default-connection' # @param {type:"string"}
FULL_CONNECTION_ID = f"{MY_PROJECT_ID}.{LOCATION}.{MY_CONNECTION}"
import gcsfs
import bigframes
import bigframes.pandas as bpd
import bigframes.bigquery as bbq
import json
from IPython.display import HTML, display
session = bpd.get_global_session()
# Configure global display parameters
bigframes.options.display.blob_display_width = 200
def get_runtime_json_str(series, mode="R", with_metadata=False):
s = bbq.obj.fetch_metadata(series) if with_metadata else series
runtime = bbq.obj.get_access_url(s, mode=mode)
return bbq.to_json_string(runtime)
def get_read_url(series):
runtime = bbq.obj.get_access_url(series, mode="R")
return bbq.json_value(runtime, "$.access_urls.read_url")
def render_images(df):
"""Helper to display BigFrames DataFrame with rendered image previews."""
from bigframes import dtypes
if isinstance(df, bpd.Series):
df = df.to_frame()
object_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype == dtypes.OBJ_REF_DTYPE]
if not object_cols:
display(df)
return
limit = bigframes.options.display.max_rows or 10
view_df = df.head(limit)
runtime_cols = {
col: get_runtime_json_str(view_df[col], mode="R", with_metadata=False)
for col in object_cols
}
pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()
final_pd = view_df.to_pandas()
width = bigframes.options.display.blob_display_width or 200
def format_cell_html(raw_json):
if not raw_json: return ""
try:
obj_rt = json.loads(raw_json)
if "access_urls" not in obj_rt: return "Error fetching URL"
uri = obj_rt.get("objectref", {}).get("uri", "")
url = obj_rt["access_urls"]["read_url"]
if str(uri).lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
return f'<img src="{url}" width="{width}">'
return f'<a href="{url}" target="_blank">{uri}</a>'
except: return "Format Error"
for col in object_cols:
final_pd[col] = pandas_json_df[col].map(format_cell_html)
display(HTML(final_pd.to_html(escape=False)))
# List files using gcsfs
fs = gcsfs.GCSFileSystem(anon=True)
uris = fs.glob("gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*")
# Ensure URIs have gs:// prefix
uris = [u if u.startswith("gs://") else f"gs://{u}" for u in uris]
# Read the URIs into a BigQuery DataFrame
movies = bpd.read_gbq(f"SELECT uri FROM UNNEST({uris[:5]}) as uri")
# Create the object reference column using the fully qualified connection ID
movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)
movies = movies[['poster']]
render_images(movies.head(1))
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
| poster | |
|---|---|
| 0 | ![]() |
Extract titles from posters#
import bigframes.bigquery as bbq
movies['title'] = bbq.ai.generate(
("What is the movie title for this poster image?", get_read_url(movies['poster']))
).struct.field("result")
render_images(movies.head(1))
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
| poster | title | |
|---|---|---|
| 0 | ![]() |
The movie title for this poster image is **Au Secours!** (Help!). |
Notice that ai.generate() has a struct return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, "result" will be the default name. You can access LLM response content with the struct accessor (e.g. my_response.struct.filed("result"));.
Get movie release year#
In the example below, you will use ai.generate_int() to find the release year for each movie poster:
movies['year'] = bbq.ai.generate_int(
("What is the release year for this movie?", movies['title']),
endpoint='gemini-2.5-pro'
).struct.field("result")
movies.head(1)
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.
return prop(*args, **kwargs)
| poster | title | year | |
|---|---|---|---|
| 0 | ![]() |
The movie title is **Au Secours!** | 1924 |
1 rows × 3 columns
movies.dtypes
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
poster struct<uri: string, version: string, authorize...
title string[pyarrow]
year Int64
dtype: object
Filter movie by production country#
In the next example, you will use ai.if_() to find the movies that were produced in the USA.
us_movies = movies[bbq.ai.if_(
("The movie ", movies['title'], " was made in US")
)]
render_images(us_movies.head(1))
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
SQL
WITH `bfcte_0` AS (
SELECT
*
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(
'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',
0,
0
), STRUCT(
'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',
1,
1
), STRUCT(
'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',
2,
2
), STRUCT(
'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',
3,
3
), STRUCT(
'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',
4,
4
)])
)
SELECT
`bfcol_1` AS `bfuid_col_60`,
TO_JSON_STRING(
OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')
) AS `bfuid_col_66`
FROM `bfcte_0`
WHERE
AI.IF(
prompt => (
'The movie ',
AI.GENERATE(
prompt => (
'What is the movie title for this poster image?',
JSON_VALUE(
OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R'),
'$.access_urls.read_url'
)
),
request_type => 'UNSPECIFIED'
).`result`,
' was made in US'
),
optimization_mode => 'MINIMIZE_COST'
)
ORDER BY
`bfcol_2` ASC NULLS LAST
LIMIT 1| poster | title | year | |
|---|---|---|---|
| 2 | The movie title for the poster image is **Battling Butler**. | 1926 |
Run in Colab


