# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Analyzing movie posters with BigQuery Dataframe AI functions#

Run in Colab

View on GitHub

Open in BQ Studio

BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old movie posters. These posters are images stored in a public Google Cloud Storage bucket: gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters

Set up#

Before you begin, you need to

Set up your permissions for generative AI functions with these instructions
Set up your Cloud Resource connection by following these instructions

Once you have the permissions set up, import the bigframes.pandas package, and set your cloud project ID.

import bigframes.pandas as bpd

MY_PROJECT_ID = "bigframes-dev" # @param {type:"string"}
LOCATION = "us" # @param {type:"string"}

bpd.options.bigquery.project = MY_PROJECT_ID
bpd.options.bigquery.location = LOCATION

Load data#

First, you load the data from the GCS bucket to a BigQuery Dataframe:

# Replace with your own connection name.
MY_CONNECTION = 'bigframes-default-connection' # @param {type:"string"}
FULL_CONNECTION_ID = f"{MY_PROJECT_ID}.{LOCATION}.{MY_CONNECTION}"

import gcsfs
import bigframes
import bigframes.pandas as bpd
import bigframes.bigquery as bbq
import json
from IPython.display import HTML, display

session = bpd.get_global_session()

# Configure global display parameters 
bigframes.options.display.blob_display_width = 200

def get_runtime_json_str(series, mode="R", with_metadata=False):
    s = bbq.obj.fetch_metadata(series) if with_metadata else series
    runtime = bbq.obj.get_access_url(s, mode=mode)
    return bbq.to_json_string(runtime)

def get_read_url(series):
    runtime = bbq.obj.get_access_url(series, mode="R")
    return bbq.json_value(runtime, "$.access_urls.read_url")

def render_images(df):
    """Helper to display BigFrames DataFrame with rendered image previews."""
    from bigframes import dtypes
    if isinstance(df, bpd.Series):
        df = df.to_frame()
    
    object_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype == dtypes.OBJ_REF_DTYPE]
    if not object_cols:
        display(df)
        return

    limit = bigframes.options.display.max_rows or 10
    view_df = df.head(limit)
    runtime_cols = {
        col: get_runtime_json_str(view_df[col], mode="R", with_metadata=False) 
        for col in object_cols
    }
    
    pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()
    final_pd = view_df.to_pandas()
    width = bigframes.options.display.blob_display_width or 200
    
    def format_cell_html(raw_json):
        if not raw_json: return ""
        try:
            obj_rt = json.loads(raw_json)
            if "access_urls" not in obj_rt: return "Error fetching URL"
            uri = obj_rt.get("objectref", {}).get("uri", "")
            url = obj_rt["access_urls"]["read_url"]
            if str(uri).lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
                return f'<img src="{url}" width="{width}">'
            return f'<a href="{url}" target="_blank">{uri}</a>'
        except: return "Format Error"

    for col in object_cols:
        final_pd[col] = pandas_json_df[col].map(format_cell_html)
    display(HTML(final_pd.to_html(escape=False)))

# List files using gcsfs
fs = gcsfs.GCSFileSystem(anon=True)
uris = fs.glob("gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*")

# Ensure URIs have gs:// prefix
uris = [u if u.startswith("gs://") else f"gs://{u}" for u in uris]

# Read the URIs into a BigQuery DataFrame
movies = bpd.read_gbq(f"SELECT uri FROM UNNEST({uris[:5]}) as uri")

# Create the object reference column using the fully qualified connection ID
movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)
movies = movies[['poster']]
render_images(movies.head(1))

Query processed 0 Bytes in a moment of slot time.

/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)

Query processed 0 Bytes in 18 seconds of slot time.

Query processed 0 Bytes in 8 seconds of slot time.

	poster
0

Extract titles from posters#

import bigframes.bigquery as bbq

movies['title'] = bbq.ai.generate(
    ("What is the movie title for this poster image?", get_read_url(movies['poster']))
).struct.field("result")
render_images(movies.head(1))

/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)

Query processed 0 Bytes in 23 seconds of slot time. [Job bigframes-dev:US.job_ZKfuxLQE1U49whg7fgakYFYfiz34 details]

Query processed 0 Bytes in 40 seconds of slot time. [Job bigframes-dev:US.job_VwLv_BxDFdE4adNx1bpnvvM5vfZd details]

	poster	title
0		The movie title for this poster image is Au Secours! (Help!).

Notice that ai.generate() has a struct return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, "result" will be the default name. You can access LLM response content with the struct accessor (e.g. my_response.struct.filed("result"));.

Get movie release year#

In the example below, you will use ai.generate_int() to find the release year for each movie poster:

movies['year'] = bbq.ai.generate_int(
    ("What is the release year for this movie?", movies['title']),
    endpoint='gemini-2.5-pro'
).struct.field("result")

movies.head(1)

/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.
  return prop(*args, **kwargs)

Query processed 0 Bytes in 51 seconds of slot time. [Job bigframes-dev:US.3cf4ab5b-c360-4b7c-9def-4cd03135a547 details]

Query processed 1.2 kB in a moment of slot time.

	poster	title	year
0		The movie title is Au Secours!	1924

1 rows × 3 columns

[1 rows x 3 columns in total]

movies.dtypes

/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)

poster    struct<uri: string, version: string, authorize...
title                                       string[pyarrow]
year                                                  Int64
dtype: object

Filter movie by production country#

In the next example, you will use ai.if_() to find the movies that were produced in the USA.

us_movies = movies[bbq.ai.if_(
    ("The movie ", movies['title'], " was made in US")
)]
render_images(us_movies.head(1))

/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)

Query started with request ID bigframes-dev:US.226dcefe-0dc2-4098-a44e-812f8f96b139.

SQL

WITH `bfcte_0` AS (
  SELECT
    *
  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',
    0,
    0
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',
    1,
    1
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',
    2,
    2
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',
    3,
    3
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',
    4,
    4
  )])
)
SELECT
  `bfcol_1` AS `bfuid_col_60`,
  TO_JSON_STRING(
    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')
  ) AS `bfuid_col_66`
FROM `bfcte_0`
WHERE
  AI.IF(
    prompt => (
      'The movie ',
      AI.GENERATE(
        prompt => (
          'What is the movie title for this poster image?',
          JSON_VALUE(
            OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R'),
            '$.access_urls.read_url'
          )
        ),
        request_type => 'UNSPECIFIED'
      ).`result`,
      ' was made in US'
    ),
    optimization_mode => 'MINIMIZE_COST'
  )
ORDER BY
  `bfcol_2` ASC NULLS LAST
LIMIT 1

Query processed 0 Bytes in 3 minutes of slot time. [Job bigframes-dev:US.job_NBILG5qU14Aitas81nPCCtYM9KdM details]

	poster	title	year
2		The movie title for the poster image is Battling Butler.	1926