{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "eeec3428", "metadata": {}, "outputs": [], "source": [ "# Copyright 2023 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "attachments": {}, "cell_type": "markdown", "id": "47439dbd-4e54-4954-8b16-edc4bcd4f855", "metadata": {}, "source": [ "# Operations with an \"NULL index\" DataFrame\n", "\n", "**Note**: This notebook describes a feature that is currently in [preview](https://cloud.google.com/blog/products/gcp/google-cloud-gets-simplified-product-launch-stages). There may be breaking changes to the functionality when using \"NULL index\" objects.\n", "\n", "Use the \"NULL\" index for more efficient query generation, but\n", "some pandas-compatible methods may not be possible without an index." ] }, { "cell_type": "code", "execution_count": null, "id": "96757c59-fc22-420e-a42f-c6cb956110ec", "metadata": {}, "outputs": [], "source": [ "import bigframes.enums\n", "import bigframes.exceptions\n", "import bigframes.pandas as bpd\n", "\n", "df = bpd.read_gbq(\n", " \"bigquery-public-data.baseball.schedules\",\n", " index_col=bigframes.enums.DefaultIndexKind.NULL,\n", ")" ] }, { "cell_type": "markdown", "id": "d15688e1", "metadata": {}, "source": [ "Use `peek()` to view an arbitrary selection of rows from the DataFrame. This is much more efficient than `head()`, which requires a total ordering for determinism." ] }, { "cell_type": "code", "execution_count": 3, "id": "c93949fb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 1b8726ce-c4ea-47fe-a47c-d6fae50d8fb0 is DONE. 582.8 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdgameNumberseasonIdyeartypedayNightdurationduration_minuteshomeTeamIdhomeTeamNameawayTeamIdawayTeamNamestartTimeattendancestatuscreated
0e14b6493-9e7f-404f-840a-8a680cc364bf1565de4be-dc80-4849-a7e1-54bc79156cc82016REGD3:0718703556285-bdbb-4576-a06d-42f71f46ddc5Marlins55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-26 17:10:00+00:0027318closed2016-10-06 06:25:15+00:00
11f32b347-cbcb-4c31-a145-0e685306d1681565de4be-dc80-4849-a7e1-54bc79156cc82016REGD3:0918903556285-bdbb-4576-a06d-42f71f46ddc5Marlins55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-25 20:10:00+00:0029457closed2016-10-06 06:25:15+00:00
20c2292d1-7398-48be-bf8e-b41dad5e1a431565de4be-dc80-4849-a7e1-54bc79156cc82016REGD2:4516512079497-e414-450a-8bf2-29f91de646bfBraves55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-11 20:10:00+00:0043114closed2016-10-06 06:25:15+00:00
38fbec734-a15a-42ab-8d51-60790de7750b1565de4be-dc80-4849-a7e1-54bc79156cc82016REGD3:4222212079497-e414-450a-8bf2-29f91de646bfBraves55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-12 17:35:00+00:0031625closed2016-10-06 06:25:15+00:00
489e514d5-fbf5-4b9d-bdac-6ca45bfd18dd1565de4be-dc80-4849-a7e1-54bc79156cc82016REGD2:441642142e1ba-3b40-445c-b8bb-f1f8b1054220Phillies55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-08 17:05:00+00:0028650closed2016-10-06 06:25:15+00:00
\n", "
" ], "text/plain": [ " gameId gameNumber \\\n", "0 e14b6493-9e7f-404f-840a-8a680cc364bf 1 \n", "1 1f32b347-cbcb-4c31-a145-0e685306d168 1 \n", "2 0c2292d1-7398-48be-bf8e-b41dad5e1a43 1 \n", "3 8fbec734-a15a-42ab-8d51-60790de7750b 1 \n", "4 89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd 1 \n", "\n", " seasonId year type dayNight duration \\\n", "0 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG D 3:07 \n", "1 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG D 3:09 \n", "2 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG D 2:45 \n", "3 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG D 3:42 \n", "4 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG D 2:44 \n", "\n", " duration_minutes homeTeamId homeTeamName \\\n", "0 187 03556285-bdbb-4576-a06d-42f71f46ddc5 Marlins \n", "1 189 03556285-bdbb-4576-a06d-42f71f46ddc5 Marlins \n", "2 165 12079497-e414-450a-8bf2-29f91de646bf Braves \n", "3 222 12079497-e414-450a-8bf2-29f91de646bf Braves \n", "4 164 2142e1ba-3b40-445c-b8bb-f1f8b1054220 Phillies \n", "\n", " awayTeamId awayTeamName \\\n", "0 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "1 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "2 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "3 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "4 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "\n", " startTime attendance status created \n", "0 2016-06-26 17:10:00+00:00 27318 closed 2016-10-06 06:25:15+00:00 \n", "1 2016-06-25 20:10:00+00:00 29457 closed 2016-10-06 06:25:15+00:00 \n", "2 2016-06-11 20:10:00+00:00 43114 closed 2016-10-06 06:25:15+00:00 \n", "3 2016-06-12 17:35:00+00:00 31625 closed 2016-10-06 06:25:15+00:00 \n", "4 2016-06-08 17:05:00+00:00 28650 closed 2016-10-06 06:25:15+00:00 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.peek()" ] }, { "cell_type": "markdown", "id": "78e3d27d", "metadata": {}, "source": [ "# Inspect the properties of the DataFrame\n", "\n", "Some properties, such as `dtypes`, can be retrieved without executing a query job." ] }, { "cell_type": "code", "execution_count": 4, "id": "38f566c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "gameId string[pyarrow]\n", "gameNumber Int64\n", "seasonId string[pyarrow]\n", "year Int64\n", "type string[pyarrow]\n", "dayNight string[pyarrow]\n", "duration string[pyarrow]\n", "duration_minutes Int64\n", "homeTeamId string[pyarrow]\n", "homeTeamName string[pyarrow]\n", "awayTeamId string[pyarrow]\n", "awayTeamName string[pyarrow]\n", "startTime timestamp[us, tz=UTC][pyarrow]\n", "attendance Int64\n", "status string[pyarrow]\n", "created timestamp[us, tz=UTC][pyarrow]\n", "dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "markdown", "id": "38a59ecc", "metadata": {}, "source": [ "Other properties, such as `shape` require a query. In this case, `shape` runs a `COUNT(1)` query." ] }, { "cell_type": "code", "execution_count": 5, "id": "e3b43d37", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 0f85f12c-227c-4001-b851-6e9b9087ab7e is DONE. 0 Bytes processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "(2431, 16)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "attachments": {}, "cell_type": "markdown", "id": "13861abc-120c-4db6-ad0c-e414b85d3443", "metadata": {}, "source": [ "## Select a subset of the DataFrame", "\n", "Filter columns by selecting a list of columns from the DataFrame.\n", "\n", "**Note**: Even with `index_col=bigframes.enums.DefaultIndexKind.NULL`, it is more efficient to do this selection in `read_gbq` / `read_gbq_table` except in cases where the total ordering ID columns can be pruned." ] }, { "cell_type": "code", "execution_count": 6, "id": "05cb36e9-bb75-4f6f-8eb6-e4219df6e1d2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job efa6b4be-cf60-4951-9125-7d77fb6b6b44 is DONE. 174.4 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdyearhomeTeamNameawayTeamNameduration_minutes
0e14b6493-9e7f-404f-840a-8a680cc364bf2016MarlinsCubs187
11f32b347-cbcb-4c31-a145-0e685306d1682016MarlinsCubs189
20c2292d1-7398-48be-bf8e-b41dad5e1a432016BravesCubs165
38fbec734-a15a-42ab-8d51-60790de7750b2016BravesCubs222
489e514d5-fbf5-4b9d-bdac-6ca45bfd18dd2016PhilliesCubs164
\n", "
" ], "text/plain": [ " gameId year homeTeamName awayTeamName \\\n", "0 e14b6493-9e7f-404f-840a-8a680cc364bf 2016 Marlins Cubs \n", "1 1f32b347-cbcb-4c31-a145-0e685306d168 2016 Marlins Cubs \n", "2 0c2292d1-7398-48be-bf8e-b41dad5e1a43 2016 Braves Cubs \n", "3 8fbec734-a15a-42ab-8d51-60790de7750b 2016 Braves Cubs \n", "4 89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd 2016 Phillies Cubs \n", "\n", " duration_minutes \n", "0 187 \n", "1 189 \n", "2 165 \n", "3 222 \n", "4 164 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "column_filtered = df[[\"gameId\", \"year\", \"homeTeamName\", \"awayTeamName\", \"duration_minutes\"]]\n", "column_filtered.peek()" ] }, { "cell_type": "markdown", "id": "d4d52c41", "metadata": {}, "source": [ "Filter by rows using a boolean Series. This Series must be derived from the DataFrame being filtered so that the NULL index can still align correctly." ] }, { "cell_type": "code", "execution_count": 7, "id": "a6b8b3ac-1df8-46ff-ac4f-d6e7657fc80c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 0be8e44d-854a-45ca-950b-269280e3de41 is DONE. 582.8 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdgameNumberseasonIdyeartypedayNightdurationduration_minuteshomeTeamIdhomeTeamNameawayTeamIdawayTeamNamestartTimeattendancestatuscreated
063f14670-c28e-432b-84ee-1a2c6ac295271565de4be-dc80-4849-a7e1-54bc79156cc82016REGN2:4316303556285-bdbb-4576-a06d-42f71f46ddc5Marlins55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-23 23:10:00+00:0025291closed2016-10-06 06:25:15+00:00
1bf4e80d1-3125-44fa-8a89-de93d039d4651565de4be-dc80-4849-a7e1-54bc79156cc82016REGN3:2420403556285-bdbb-4576-a06d-42f71f46ddc5Marlins55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-24 23:10:00+00:0024385closed2016-10-06 06:25:15+00:00
2e8af534c-36ed-4ff9-8511-780825fdd0411565de4be-dc80-4849-a7e1-54bc79156cc82016REGN2:5117112079497-e414-450a-8bf2-29f91de646bfBraves55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-10 23:35:00+00:0030547closed2016-10-06 06:25:15+00:00
3e599c525-ac42-4b54-928d-7ee5fbe67dd91565de4be-dc80-4849-a7e1-54bc79156cc82016REGN2:451652142e1ba-3b40-445c-b8bb-f1f8b1054220Phillies55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-07 23:05:00+00:0027381closed2016-10-06 06:25:15+00:00
4d80ffb65-57a4-42c9-ae1c-2c51d06503361565de4be-dc80-4849-a7e1-54bc79156cc82016REGN3:051852142e1ba-3b40-445c-b8bb-f1f8b1054220Phillies55714da8-fcaf-4574-8443-59bfb511a524Cubs2016-06-06 23:05:00+00:0022162closed2016-10-06 06:25:15+00:00
\n", "
" ], "text/plain": [ " gameId gameNumber \\\n", "0 63f14670-c28e-432b-84ee-1a2c6ac29527 1 \n", "1 bf4e80d1-3125-44fa-8a89-de93d039d465 1 \n", "2 e8af534c-36ed-4ff9-8511-780825fdd041 1 \n", "3 e599c525-ac42-4b54-928d-7ee5fbe67dd9 1 \n", "4 d80ffb65-57a4-42c9-ae1c-2c51d0650336 1 \n", "\n", " seasonId year type dayNight duration \\\n", "0 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG N 2:43 \n", "1 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG N 3:24 \n", "2 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG N 2:51 \n", "3 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG N 2:45 \n", "4 565de4be-dc80-4849-a7e1-54bc79156cc8 2016 REG N 3:05 \n", "\n", " duration_minutes homeTeamId homeTeamName \\\n", "0 163 03556285-bdbb-4576-a06d-42f71f46ddc5 Marlins \n", "1 204 03556285-bdbb-4576-a06d-42f71f46ddc5 Marlins \n", "2 171 12079497-e414-450a-8bf2-29f91de646bf Braves \n", "3 165 2142e1ba-3b40-445c-b8bb-f1f8b1054220 Phillies \n", "4 185 2142e1ba-3b40-445c-b8bb-f1f8b1054220 Phillies \n", "\n", " awayTeamId awayTeamName \\\n", "0 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "1 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "2 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "3 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "4 55714da8-fcaf-4574-8443-59bfb511a524 Cubs \n", "\n", " startTime attendance status created \n", "0 2016-06-23 23:10:00+00:00 25291 closed 2016-10-06 06:25:15+00:00 \n", "1 2016-06-24 23:10:00+00:00 24385 closed 2016-10-06 06:25:15+00:00 \n", "2 2016-06-10 23:35:00+00:00 30547 closed 2016-10-06 06:25:15+00:00 \n", "3 2016-06-07 23:05:00+00:00 27381 closed 2016-10-06 06:25:15+00:00 \n", "4 2016-06-06 23:05:00+00:00 22162 closed 2016-10-06 06:25:15+00:00 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "night_games = df[df['dayNight'] == 'N']\n", "night_games.peek()" ] }, { "attachments": {}, "cell_type": "markdown", "id": "80e9a2e2-c4c9-4c17-bbd0-06882d7657fe", "metadata": {}, "source": [ "### Join two DataFrames\n", "\n", "Even though pandas usually joins by the index, NULL index objects can still be manually joined by a column using the `on` parameter in `merge`." ] }, { "cell_type": "code", "execution_count": 8, "id": "3f09ff32-ef43-4fab-a86b-8868afc34363", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 5d2c69d2-33fe-4513-923b-fd64f4da098b is DONE. 113.9 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamName
0e14b6493-9e7f-404f-840a-8a680cc364bfMarlins
11f32b347-cbcb-4c31-a145-0e685306d168Marlins
20c2292d1-7398-48be-bf8e-b41dad5e1a43Braves
38fbec734-a15a-42ab-8d51-60790de7750bBraves
489e514d5-fbf5-4b9d-bdac-6ca45bfd18ddPhillies
\n", "
" ], "text/plain": [ " gameId homeTeamName\n", "0 e14b6493-9e7f-404f-840a-8a680cc364bf Marlins\n", "1 1f32b347-cbcb-4c31-a145-0e685306d168 Marlins\n", "2 0c2292d1-7398-48be-bf8e-b41dad5e1a43 Braves\n", "3 8fbec734-a15a-42ab-8d51-60790de7750b Braves\n", "4 89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd Phillies" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = df[[\"gameId\", \"homeTeamName\"]]\n", "df1.peek()" ] }, { "cell_type": "code", "execution_count": 9, "id": "5331d2c8-7912-4d96-8da1-f64b57374df3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job b6b70d6d-a490-44d6-ba74-0ee32b4f0a1a is DONE. 582.8 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Query job 68acd168-8b42-44f8-8702-99618935991e is DONE. 94 Bytes processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdawayTeamName
0af72a0b9-65f7-49fb-9b30-d505068bdf6dBrewers
1d60c6036-0ce1-4c90-8dd9-de3b403c92a8Brewers
\n", "
" ], "text/plain": [ " gameId awayTeamName\n", "0 af72a0b9-65f7-49fb-9b30-d505068bdf6d Brewers\n", "1 d60c6036-0ce1-4c90-8dd9-de3b403c92a8 Brewers" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df[[\"gameId\", \"awayTeamName\"]].head(2)\n", "df2.peek()" ] }, { "cell_type": "code", "execution_count": 10, "id": "a574ad3e-a219-454c-8bb5-c5ed6627f2c6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 0ac171dd-3859-4589-b7ff-59fd81ec3c3a is DONE. 582.9 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Query job 034f8807-c128-444a-8033-0c95f34b0e32 is DONE. 111 Bytes processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamNameawayTeamName
0af72a0b9-65f7-49fb-9b30-d505068bdf6dRedsBrewers
1d60c6036-0ce1-4c90-8dd9-de3b403c92a8NationalsBrewers
\n", "
" ], "text/plain": [ " gameId homeTeamName awayTeamName\n", "0 af72a0b9-65f7-49fb-9b30-d505068bdf6d Reds Brewers\n", "1 d60c6036-0ce1-4c90-8dd9-de3b403c92a8 Nationals Brewers" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged = df1.merge(df2, on=\"gameId\", how=\"inner\")\n", "merged.peek()" ] }, { "cell_type": "code", "execution_count": 11, "id": "288e7a95-a077-46c4-8fe6-802474c01f8b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 30fd5a60-772c-4ef0-a151-5ab390ff4322 is DONE. 582.9 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Query job 701fa9a8-1ec6-49b9-ac41-228cb34d4c8c is DONE. 114.0 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamNameawayTeamName
0039bb40e-7613-4674-a653-584b93e9b21bAmerican League<NA>
178000e12-2ef3-4246-adc1-c8a4d157631cAngels<NA>
2de5555dc-9228-4f7c-88ae-4451e3ffb980Angels<NA>
3f29a2754-004b-436c-91fe-3d86c0bb17a8Angels<NA>
48e5af008-8a07-4f9a-90cb-336ca4c84c71Angels<NA>
\n", "
" ], "text/plain": [ " gameId homeTeamName awayTeamName\n", "0 039bb40e-7613-4674-a653-584b93e9b21b American League \n", "1 78000e12-2ef3-4246-adc1-c8a4d157631c Angels \n", "2 de5555dc-9228-4f7c-88ae-4451e3ffb980 Angels \n", "3 f29a2754-004b-436c-91fe-3d86c0bb17a8 Angels \n", "4 8e5af008-8a07-4f9a-90cb-336ca4c84c71 Angels " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged = df1.merge(df2, on=\"gameId\", how=\"outer\")\n", "merged.peek()" ] }, { "cell_type": "code", "execution_count": 12, "id": "7ee87a01-2ff5-4021-855d-44b71cf2a225", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job e3d8168c-48e9-4ba9-a916-10259ad9c0ea is DONE. 582.9 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Query job 82d2a5e4-66a8-4478-92de-57d3f806aa76 is DONE. 114.0 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamNameawayTeamName
0039bb40e-7613-4674-a653-584b93e9b21bAmerican League<NA>
1f6fcd83c-e130-487c-a0cc-d00b2712d08bAngels<NA>
2fe401dd2-089c-4822-8657-4d510d460f38Angels<NA>
3c894bdee-5dda-49f4-87c8-53b9b9bfcd3bAngels<NA>
4bbda59d9-fd52-4bed-bcfb-2ceed4be997cAngels<NA>
\n", "
" ], "text/plain": [ " gameId homeTeamName awayTeamName\n", "0 039bb40e-7613-4674-a653-584b93e9b21b American League \n", "1 f6fcd83c-e130-487c-a0cc-d00b2712d08b Angels \n", "2 fe401dd2-089c-4822-8657-4d510d460f38 Angels \n", "3 c894bdee-5dda-49f4-87c8-53b9b9bfcd3b Angels \n", "4 bbda59d9-fd52-4bed-bcfb-2ceed4be997c Angels " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged = df1.merge(df2, on=\"gameId\", how=\"left\")\n", "merged.peek()" ] }, { "cell_type": "code", "execution_count": 13, "id": "330ed69c-f122-4af9-bf5e-96e309d3fa0c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 518ed511-606a-42b2-a28d-61a601eccfa7 is DONE. 582.9 kB processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Query job bc381640-74e0-4885-9c32-87805a49f357 is DONE. 111 Bytes processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamNameawayTeamName
0af72a0b9-65f7-49fb-9b30-d505068bdf6dRedsBrewers
1d60c6036-0ce1-4c90-8dd9-de3b403c92a8NationalsBrewers
\n", "
" ], "text/plain": [ " gameId homeTeamName awayTeamName\n", "0 af72a0b9-65f7-49fb-9b30-d505068bdf6d Reds Brewers\n", "1 d60c6036-0ce1-4c90-8dd9-de3b403c92a8 Nationals Brewers" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged = df1.merge(df2, on=\"gameId\", how=\"right\")\n", "merged.peek()" ] }, { "cell_type": "markdown", "id": "162eede7", "metadata": {}, "source": [ "### Download the result as (in-memory) pandas DataFrame\n", "\n", "Use the `ordered=False` argument for more efficient query execution." ] }, { "cell_type": "code", "execution_count": 17, "id": "ab429fa5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "Query job 2d4fbd55-ba6a-46d2-87ae-5da416ad3642 is DONE. 159 Bytes processed. Open Job" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gameIdhomeTeamNameawayTeamName
0d60c6036-0ce1-4c90-8dd9-de3b403c92a8NationalsBrewers
1af72a0b9-65f7-49fb-9b30-d505068bdf6dRedsBrewers
\n", "
" ], "text/plain": [ " gameId homeTeamName awayTeamName\n", "0 d60c6036-0ce1-4c90-8dd9-de3b403c92a8 Nationals Brewers\n", "1 af72a0b9-65f7-49fb-9b30-d505068bdf6d Reds Brewers" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfp = merged.to_pandas(ordered=False)\n", "dfp" ] }, { "cell_type": "code", "execution_count": null, "id": "896212ab", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 5 }