Source code for bigframes.bigquery._operations.search

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import json
import typing
from typing import Collection, Literal, Mapping, Optional, Union

import google.cloud.bigquery as bigquery

import bigframes.ml.utils as utils

if typing.TYPE_CHECKING:
    import bigframes.dataframe as dataframe
    import bigframes.series as series
    import bigframes.session

"""
Search functions defined from
https://cloud.google.com/bigquery/docs/reference/standard-sql/search_functions
"""


[docs] def create_vector_index( table_id: str, column_name: str, *, replace: bool = False, index_name: Optional[str] = None, distance_type="cosine", stored_column_names: Collection[str] = (), index_type: str = "ivf", ivf_options: Optional[Mapping] = None, tree_ah_options: Optional[Mapping] = None, session: Optional[bigframes.session.Session] = None, ) -> None: """ Creates a new vector index on a column of a table. This method calls the `CREATE VECTOR INDEX DDL statement <https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_vector_index_statement>`_. """ import bigframes.pandas if index_name is None: table_ref = bigquery.TableReference.from_string(table_id) index_name = table_ref.table_id options = { "index_type": index_type.upper(), "distance_type": distance_type.upper(), } if ivf_options is not None: options["ivf_options"] = json.dumps(ivf_options) if tree_ah_options is not None: options["tree_ah_options"] = json.dumps(tree_ah_options) sql = bigframes.core.sql.create_vector_index_ddl( replace=replace, index_name=index_name, table_name=table_id, column_name=column_name, stored_column_names=stored_column_names, options=options, ) # Use global read_gbq to execute this for better location autodetection. if session is None: read_gbq_query = bigframes.pandas.read_gbq_query else: read_gbq_query = session.read_gbq_query read_gbq_query(sql)