# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Analyzing movie posters with BigQuery Dataframe AI functions#

Colab logo Run in Colab GitHub logo View on GitHub BQ logo Open in BQ Studio

BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old movie posters. These posters are images stored in a public Google Cloud Storage bucket: gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters

Set up#

Before you begin, you need to

Once you have the permissions set up, import the bigframes.pandas package, and set your cloud project ID.

import bigframes.pandas as bpd

MY_PROJECT_ID = "bigframes-dev" # @param {type:"string"}
LOCATION = "us" # @param {type:"string"}

bpd.options.bigquery.project = MY_PROJECT_ID
bpd.options.bigquery.location = LOCATION

Load data#

First, you load the data from the GCS bucket to a BigQuery Dataframe:

# Replace with your own connection name.
MY_CONNECTION = 'bigframes-default-connection' # @param {type:"string"}
FULL_CONNECTION_ID = f"{MY_PROJECT_ID}.{LOCATION}.{MY_CONNECTION}"

import gcsfs
import bigframes
import bigframes.pandas as bpd
import bigframes.bigquery as bbq
import json
from IPython.display import HTML, display

session = bpd.get_global_session()

# Configure global display parameters 
bigframes.options.display.blob_display_width = 200

def get_runtime_json_str(series, mode="R", with_metadata=False):
    s = bbq.obj.fetch_metadata(series) if with_metadata else series
    runtime = bbq.obj.get_access_url(s, mode=mode)
    return bbq.to_json_string(runtime)

def get_read_url(series):
    runtime = bbq.obj.get_access_url(series, mode="R")
    return bbq.json_value(runtime, "$.access_urls.read_url")

def render_images(df):
    """Helper to display BigFrames DataFrame with rendered image previews."""
    from bigframes import dtypes
    if isinstance(df, bpd.Series):
        df = df.to_frame()
    
    object_cols = [col for col, dtype in zip(df.columns, df.dtypes) if dtype == dtypes.OBJ_REF_DTYPE]
    if not object_cols:
        display(df)
        return

    limit = bigframes.options.display.max_rows or 10
    view_df = df.head(limit)
    runtime_cols = {
        col: get_runtime_json_str(view_df[col], mode="R", with_metadata=False) 
        for col in object_cols
    }
    
    pandas_json_df = bpd.DataFrame(runtime_cols).to_pandas()
    final_pd = view_df.to_pandas()
    width = bigframes.options.display.blob_display_width or 200
    
    def format_cell_html(raw_json):
        if not raw_json: return ""
        try:
            obj_rt = json.loads(raw_json)
            if "access_urls" not in obj_rt: return "Error fetching URL"
            uri = obj_rt.get("objectref", {}).get("uri", "")
            url = obj_rt["access_urls"]["read_url"]
            if str(uri).lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
                return f'<img src="{url}" width="{width}">'
            return f'<a href="{url}" target="_blank">{uri}</a>'
        except: return "Format Error"

    for col in object_cols:
        final_pd[col] = pandas_json_df[col].map(format_cell_html)
    display(HTML(final_pd.to_html(escape=False)))

# List files using gcsfs
fs = gcsfs.GCSFileSystem(anon=True)
uris = fs.glob("gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*")

# Ensure URIs have gs:// prefix
uris = [u if u.startswith("gs://") else f"gs://{u}" for u in uris]

# Read the URIs into a BigQuery DataFrame
movies = bpd.read_gbq(f"SELECT uri FROM UNNEST({uris[:5]}) as uri")

# Create the object reference column using the fully qualified connection ID
movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=FULL_CONNECTION_ID)
movies = movies[['poster']]
render_images(movies.head(1))
Query processed 0 Bytes in a moment of slot time.
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
Query processed 0 Bytes in 18 seconds of slot time.
Query processed 0 Bytes in 8 seconds of slot time.
poster
0

Extract titles from posters#

import bigframes.bigquery as bbq

movies['title'] = bbq.ai.generate(
    ("What is the movie title for this poster image?", get_read_url(movies['poster']))
).struct.field("result")
render_images(movies.head(1))
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
Query processed 0 Bytes in 23 seconds of slot time. [Job bigframes-dev:US.job_ZKfuxLQE1U49whg7fgakYFYfiz34 details]
Query processed 0 Bytes in 40 seconds of slot time. [Job bigframes-dev:US.job_VwLv_BxDFdE4adNx1bpnvvM5vfZd details]
poster title
0 The movie title for this poster image is **Au Secours!** (Help!).

Notice that ai.generate() has a struct return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, "result" will be the default name. You can access LLM response content with the struct accessor (e.g. my_response.struct.filed("result"));.

Get movie release year#

In the example below, you will use ai.generate_int() to find the release year for each movie poster:

movies['year'] = bbq.ai.generate_int(
    ("What is the release year for this movie?", movies['title']),
    endpoint='gemini-2.5-pro'
).struct.field("result")

movies.head(1)
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.
  return prop(*args, **kwargs)
Query processed 0 Bytes in 51 seconds of slot time. [Job bigframes-dev:US.3cf4ab5b-c360-4b7c-9def-4cd03135a547 details]
Query processed 1.2 kB in a moment of slot time.
poster title year
0 The movie title is **Au Secours!** 1924

1 rows × 3 columns

[1 rows x 3 columns in total]
movies.dtypes
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
poster    struct<uri: string, version: string, authorize...
title                                       string[pyarrow]
year                                                  Int64
dtype: object

Filter movie by production country#

In the next example, you will use ai.if_() to find the movies that were produced in the USA.

us_movies = movies[bbq.ai.if_(
    ("The movie ", movies['title'], " was made in US")
)]
render_images(us_movies.head(1))
/usr/local/google/home/shuowei/src/google-cloud-python/google-cloud-python/packages/bigframes/bigframes/dtypes.py:1044: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())
instead of using `db_dtypes` in the future when available in pandas
(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.
  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)
Query started with request ID bigframes-dev:US.226dcefe-0dc2-4098-a44e-812f8f96b139.
SQL
WITH `bfcte_0` AS (
  SELECT
    *
  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` STRING, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/au_secours.jpeg',
    0,
    0
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/barque_sortant_du_port.jpeg',
    1,
    1
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/battling_butler.jpg',
    2,
    2
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/brown_of_harvard.jpeg',
    3,
    3
  ), STRUCT(
    'gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/der_student_von_prag.jpg',
    4,
    4
  )])
)
SELECT
  `bfcol_1` AS `bfuid_col_60`,
  TO_JSON_STRING(
    OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R')
  ) AS `bfuid_col_66`
FROM `bfcte_0`
WHERE
  AI.IF(
    prompt => (
      'The movie ',
      AI.GENERATE(
        prompt => (
          'What is the movie title for this poster image?',
          JSON_VALUE(
            OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`bfcol_0`, 'bigframes-dev.us.bigframes-default-connection'), 'R'),
            '$.access_urls.read_url'
          )
        ),
        request_type => 'UNSPECIFIED'
      ).`result`,
      ' was made in US'
    ),
    optimization_mode => 'MINIMIZE_COST'
  )
ORDER BY
  `bfcol_2` ASC NULLS LAST
LIMIT 1
Query processed 0 Bytes in 3 minutes of slot time. [Job bigframes-dev:US.job_NBILG5qU14Aitas81nPCCtYM9KdM details]
poster title year
2 The movie title for the poster image is **Battling Butler**. 1926