{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "ur8xi4C7S06n" }, "outputs": [], "source": [ "# Copyright 2024 Google LLC\n", "#\n", "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "markdown", "metadata": { "id": "JAPoU8Sm5E6e" }, "source": [ "# Machine Learning Fundamentals with BigQuery DataFrames\n", "\n", "
\n",
" \n",
" Run in Colab\n",
" \n",
" | \n",
" \n",
" \n",
" \n",
" View on GitHub\n",
" \n",
" | \n",
" \n",
" \n",
" | \n",
" \n",
" \n",
" | \n",
"
| \n", " | species | \n", "island | \n", "culmen_length_mm | \n", "culmen_depth_mm | \n", "flipper_length_mm | \n", "body_mass_g | \n", "sex | \n", "
|---|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 0 | \n", "Gentoo penguin (Pygoscelis papua) | \n", "Biscoe | \n", "50.5 | \n", "15.9 | \n", "225.0 | \n", "5400.0 | \n", "MALE | \n", "
| 1 | \n", "Gentoo penguin (Pygoscelis papua) | \n", "Biscoe | \n", "45.1 | \n", "14.5 | \n", "215.0 | \n", "5000.0 | \n", "FEMALE | \n", "
| 2 | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "Torgersen | \n", "41.4 | \n", "18.5 | \n", "202.0 | \n", "3875.0 | \n", "MALE | \n", "
| 3 | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "Torgersen | \n", "38.6 | \n", "17.0 | \n", "188.0 | \n", "2900.0 | \n", "FEMALE | \n", "
| 4 | \n", "Gentoo penguin (Pygoscelis papua) | \n", "Biscoe | \n", "46.5 | \n", "14.8 | \n", "217.0 | \n", "5200.0 | \n", "FEMALE | \n", "
5 rows × 7 columns
\n", "| \n", " | island | \n", "culmen_length_mm | \n", "culmen_depth_mm | \n", "flipper_length_mm | \n", "sex | \n", "species | \n", "
|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 188 | \n", "Dream | \n", "51.5 | \n", "18.7 | \n", "187.0 | \n", "MALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 251 | \n", "Biscoe | \n", "49.5 | \n", "16.1 | \n", "224.0 | \n", "MALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 231 | \n", "Biscoe | \n", "45.7 | \n", "13.9 | \n", "214.0 | \n", "FEMALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 271 | \n", "Biscoe | \n", "59.6 | \n", "17.0 | \n", "230.0 | \n", "MALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 128 | \n", "Biscoe | \n", "38.8 | \n", "17.2 | \n", "180.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
5 rows × 6 columns
\n", "| \n", " | body_mass_g | \n", "
|---|---|
| penguin_id | \n", "\n", " |
| 188 | \n", "3250.0 | \n", "
| 251 | \n", "5650.0 | \n", "
| 231 | \n", "4400.0 | \n", "
| 271 | \n", "6050.0 | \n", "
| 128 | \n", "3800.0 | \n", "
5 rows × 1 columns
\n", "| \n", " | standard_scaled_culmen_length_mm | \n", "standard_scaled_culmen_depth_mm | \n", "standard_scaled_flipper_length_mm | \n", "
|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " |
| 0 | \n", "1.20778 | \n", "-0.651531 | \n", "1.772656 | \n", "
| 2 | \n", "-0.455602 | \n", "0.662855 | \n", "0.100476 | \n", "
| 3 | \n", "-0.967412 | \n", "-0.095445 | \n", "-0.917372 | \n", "
| 4 | \n", "0.476623 | \n", "-1.207617 | \n", "1.191028 | \n", "
| 5 | \n", "-1.625454 | \n", "0.359535 | \n", "-0.626559 | \n", "
| 7 | \n", "-0.345929 | \n", "-1.86481 | \n", "0.682104 | \n", "
| 8 | \n", "0.842202 | \n", "-1.561491 | \n", "1.409139 | \n", "
| 9 | \n", "0.348671 | \n", "0.865068 | \n", "-0.263041 | \n", "
| 10 | \n", "0.933596 | \n", "1.218941 | \n", "0.827511 | \n", "
| 11 | \n", "-1.460943 | \n", "-0.297658 | \n", "-0.771966 | \n", "
| 12 | \n", "1.317454 | \n", "-0.449318 | \n", "1.409139 | \n", "
| 13 | \n", "-0.236255 | \n", "-1.763704 | \n", "0.900214 | \n", "
| 14 | \n", "0.549739 | \n", "-0.297658 | \n", "-0.626559 | \n", "
| 16 | \n", "0.970154 | \n", "-1.005404 | \n", "1.481842 | \n", "
| 17 | \n", "-1.058807 | \n", "-0.348211 | \n", "-0.190338 | \n", "
| 18 | \n", "1.354012 | \n", "-1.510937 | \n", "1.263732 | \n", "
| 19 | \n", "-0.053466 | \n", "-1.662597 | \n", "1.191028 | \n", "
| 20 | \n", "-0.199697 | \n", "-1.510937 | \n", "0.609401 | \n", "
| 21 | \n", "1.152943 | \n", "0.763962 | \n", "-0.190338 | \n", "
| 22 | \n", "-1.205038 | \n", "0.308982 | \n", "-0.699262 | \n", "
| 24 | \n", "-0.784623 | \n", "1.775028 | \n", "-0.699262 | \n", "
| 25 | \n", "-0.83946 | \n", "1.724474 | \n", "-0.771966 | \n", "
| 26 | \n", "-0.620113 | \n", "0.359535 | \n", "-0.990076 | \n", "
| 27 | \n", "0.330392 | \n", "-0.095445 | \n", "-0.408448 | \n", "
| 29 | \n", "2.194842 | \n", "-0.095445 | \n", "1.990767 | \n", "
25 rows × 3 columns
\n", "| \n", " | standard_scaled_culmen_length_mm | \n", "standard_scaled_culmen_depth_mm | \n", "standard_scaled_flipper_length_mm | \n", "
|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " |
| 1 | \n", "0.220718 | \n", "-1.359277 | \n", "1.045621 | \n", "
| 15 | \n", "-0.510439 | \n", "0.157322 | \n", "-0.771966 | \n", "
| 28 | \n", "-1.058807 | \n", "0.713408 | \n", "-0.771966 | \n", "
| 32 | \n", "1.463685 | \n", "1.168388 | \n", "0.39129 | \n", "
| 33 | \n", "-0.254534 | \n", "0.056215 | \n", "-0.990076 | \n", "
| 34 | \n", "-0.510439 | \n", "0.460642 | \n", "0.318587 | \n", "
| 37 | \n", "1.354012 | \n", "0.511195 | \n", "-0.263041 | \n", "
| 41 | \n", "-0.674949 | \n", "-0.095445 | \n", "-1.789814 | \n", "
| 47 | \n", "-1.168481 | \n", "0.662855 | \n", "-0.117634 | \n", "
| 52 | \n", "0.458344 | \n", "0.308982 | \n", "-0.699262 | \n", "
| 56 | \n", "-1.040528 | \n", "0.460642 | \n", "-1.135483 | \n", "
| 57 | \n", "-0.967412 | \n", "0.005662 | \n", "-0.117634 | \n", "
| 62 | \n", "0.988433 | \n", "-0.752638 | \n", "1.191028 | \n", "
| 65 | \n", "1.756148 | \n", "1.370601 | \n", "0.318587 | \n", "
| 67 | \n", "0.677691 | \n", "-1.359277 | \n", "1.045621 | \n", "
| 75 | \n", "-1.113644 | \n", "1.421155 | \n", "-0.771966 | \n", "
| 81 | \n", "0.677691 | \n", "0.561748 | \n", "-0.408448 | \n", "
| 89 | \n", "-0.857739 | \n", "0.713408 | \n", "-0.771966 | \n", "
| 92 | \n", "-0.802902 | \n", "0.308982 | \n", "-0.917372 | \n", "
| 93 | \n", "-0.309371 | \n", "1.168388 | \n", "-0.263041 | \n", "
| 96 | \n", "-0.309371 | \n", "0.662855 | \n", "-1.499 | \n", "
| 100 | \n", "-0.912576 | \n", "0.814515 | \n", "-0.771966 | \n", "
| 101 | \n", "0.549739 | \n", "-1.308724 | \n", "1.554546 | \n", "
| 102 | \n", "-0.126582 | \n", "0.662855 | \n", "-0.626559 | \n", "
| 107 | \n", "1.20778 | \n", "-1.005404 | \n", "1.118325 | \n", "
25 rows × 3 columns
\n", "| \n", " | onehotencoded_island | \n", "standard_scaled_culmen_length_mm | \n", "standard_scaled_culmen_depth_mm | \n", "standard_scaled_flipper_length_mm | \n", "onehotencoded_sex | \n", "onehotencoded_species | \n", "
|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 0 | \n", "[{'index': 1, 'value': 1.0}] | \n", "1.20778 | \n", "-0.651531 | \n", "1.772656 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 2 | \n", "[{'index': 3, 'value': 1.0}] | \n", "-0.455602 | \n", "0.662855 | \n", "0.100476 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 3 | \n", "[{'index': 3, 'value': 1.0}] | \n", "-0.967412 | \n", "-0.095445 | \n", "-0.917372 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 4 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.476623 | \n", "-1.207617 | \n", "1.191028 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 5 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.625454 | \n", "0.359535 | \n", "-0.626559 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 7 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.345929 | \n", "-1.86481 | \n", "0.682104 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 8 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.842202 | \n", "-1.561491 | \n", "1.409139 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 9 | \n", "[{'index': 3, 'value': 1.0}] | \n", "0.348671 | \n", "0.865068 | \n", "-0.263041 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 10 | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.933596 | \n", "1.218941 | \n", "0.827511 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 11 | \n", "[{'index': 3, 'value': 1.0}] | \n", "-1.460943 | \n", "-0.297658 | \n", "-0.771966 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 12 | \n", "[{'index': 1, 'value': 1.0}] | \n", "1.317454 | \n", "-0.449318 | \n", "1.409139 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 13 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.236255 | \n", "-1.763704 | \n", "0.900214 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 14 | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.549739 | \n", "-0.297658 | \n", "-0.626559 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 16 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.970154 | \n", "-1.005404 | \n", "1.481842 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 17 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.058807 | \n", "-0.348211 | \n", "-0.190338 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 18 | \n", "[{'index': 1, 'value': 1.0}] | \n", "1.354012 | \n", "-1.510937 | \n", "1.263732 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 19 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.053466 | \n", "-1.662597 | \n", "1.191028 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 20 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.199697 | \n", "-1.510937 | \n", "0.609401 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 21 | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.152943 | \n", "0.763962 | \n", "-0.190338 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 22 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-1.205038 | \n", "0.308982 | \n", "-0.699262 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 24 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.784623 | \n", "1.775028 | \n", "-0.699262 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 25 | \n", "[{'index': 3, 'value': 1.0}] | \n", "-0.83946 | \n", "1.724474 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 26 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.620113 | \n", "0.359535 | \n", "-0.990076 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 27 | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.330392 | \n", "-0.095445 | \n", "-0.408448 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 29 | \n", "[{'index': 1, 'value': 1.0}] | \n", "2.194842 | \n", "-0.095445 | \n", "1.990767 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
25 rows × 6 columns
\n", "| \n", " | predicted_body_mass_g | \n", "onehotencoded_island | \n", "standard_scaled_culmen_length_mm | \n", "standard_scaled_culmen_depth_mm | \n", "standard_scaled_flipper_length_mm | \n", "onehotencoded_sex | \n", "onehotencoded_species | \n", "
|---|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "4772.376044 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.220718 | \n", "-1.359277 | \n", "1.045621 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 15 | \n", "3883.373922 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.510439 | \n", "0.157322 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 28 | \n", "3479.709088 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-1.058807 | \n", "0.713408 | \n", "-0.771966 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 32 | \n", "4223.853626 | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.463685 | \n", "1.168388 | \n", "0.39129 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 33 | \n", "3197.623474 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.254534 | \n", "0.056215 | \n", "-0.990076 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 34 | \n", "4155.26742 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.510439 | \n", "0.460642 | \n", "0.318587 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 37 | \n", "3991.314095 | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.354012 | \n", "0.511195 | \n", "-0.263041 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 41 | \n", "3232.648242 | \n", "[{'index': 3, 'value': 1.0}] | \n", "-0.674949 | \n", "-0.095445 | \n", "-1.789814 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 47 | \n", "4017.740788 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-1.168481 | \n", "0.662855 | \n", "-0.117634 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 52 | \n", "3365.080596 | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.458344 | \n", "0.308982 | \n", "-0.699262 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 56 | \n", "3791.332002 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.040528 | \n", "0.460642 | \n", "-1.135483 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 57 | \n", "3547.892992 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.967412 | \n", "0.005662 | \n", "-0.117634 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 62 | \n", "5372.087702 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.988433 | \n", "-0.752638 | \n", "1.191028 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 65 | \n", "4263.232169 | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.756148 | \n", "1.370601 | \n", "0.318587 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 67 | \n", "5234.45894 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.677691 | \n", "-1.359277 | \n", "1.045621 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 75 | \n", "3979.314516 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.113644 | \n", "1.421155 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 81 | \n", "3481.331391 | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.677691 | \n", "0.561748 | \n", "-0.408448 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 89 | \n", "3915.240555 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.857739 | \n", "0.713408 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 92 | \n", "3425.563946 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.802902 | \n", "0.308982 | \n", "-0.917372 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 93 | \n", "4141.497717 | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.309371 | \n", "1.168388 | \n", "-0.263041 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 96 | \n", "3394.72289 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.309371 | \n", "0.662855 | \n", "-1.499 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 100 | \n", "3507.226918 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.912576 | \n", "0.814515 | \n", "-0.771966 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 101 | \n", "4922.286202 | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.549739 | \n", "-1.308724 | \n", "1.554546 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 102 | \n", "4016.243221 | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.126582 | \n", "0.662855 | \n", "-0.626559 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 107 | \n", "4933.655362 | \n", "[{'index': 1, 'value': 1.0}] | \n", "1.20778 | \n", "-1.005404 | \n", "1.118325 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
25 rows × 7 columns
\n", "| \n", " | CENTROID_ID | \n", "NEAREST_CENTROIDS_DISTANCE | \n", "onehotencoded_island | \n", "standard_scaled_culmen_length_mm | \n", "standard_scaled_culmen_depth_mm | \n", "standard_scaled_flipper_length_mm | \n", "onehotencoded_sex | \n", "onehotencoded_species | \n", "
|---|---|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "3 | \n", "[{'CENTROID_ID': 3, 'DISTANCE': 0.857057881337... | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.220718 | \n", "-1.359277 | \n", "1.045621 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 15 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 1.181613302004... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.510439 | \n", "0.157322 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 28 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 1.006856853050... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-1.058807 | \n", "0.713408 | \n", "-0.771966 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 32 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 1.237504384283... | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.463685 | \n", "1.168388 | \n", "0.39129 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 33 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 1.656439702919... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.254534 | \n", "0.056215 | \n", "-0.990076 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 34 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 1.343792119214... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.510439 | \n", "0.460642 | \n", "0.318587 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 37 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 0.816670297369... | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.354012 | \n", "0.511195 | \n", "-0.263041 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 41 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 1.317560921596... | \n", "[{'index': 3, 'value': 1.0}] | \n", "-0.674949 | \n", "-0.095445 | \n", "-1.789814 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 47 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 1.135112005343... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-1.168481 | \n", "0.662855 | \n", "-0.117634 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 52 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 1.004096945181... | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.458344 | \n", "0.308982 | \n", "-0.699262 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 56 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 1.218648668822... | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.040528 | \n", "0.460642 | \n", "-1.135483 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 57 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 1.238466630273... | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.967412 | \n", "0.005662 | \n", "-0.117634 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 62 | \n", "3 | \n", "[{'CENTROID_ID': 3, 'DISTANCE': 0.876984617451... | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.988433 | \n", "-0.752638 | \n", "1.191028 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 65 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 1.439604004538... | \n", "[{'index': 2, 'value': 1.0}] | \n", "1.756148 | \n", "1.370601 | \n", "0.318587 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 67 | \n", "3 | \n", "[{'CENTROID_ID': 3, 'DISTANCE': 0.763112987694... | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.677691 | \n", "-1.359277 | \n", "1.045621 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 75 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 1.075788925734... | \n", "[{'index': 1, 'value': 1.0}] | \n", "-1.113644 | \n", "1.421155 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 81 | \n", "2 | \n", "[{'CENTROID_ID': 2, 'DISTANCE': 0.777307801541... | \n", "[{'index': 2, 'value': 1.0}] | \n", "0.677691 | \n", "0.561748 | \n", "-0.408448 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 2, 'value': 1.0}] | \n", "
| 89 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 0.891303183824... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.857739 | \n", "0.713408 | \n", "-0.771966 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 92 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 0.934676470689... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.802902 | \n", "0.308982 | \n", "-0.917372 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 93 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 0.984620018517... | \n", "[{'index': 1, 'value': 1.0}] | \n", "-0.309371 | \n", "1.168388 | \n", "-0.263041 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 96 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 1.446939975674... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.309371 | \n", "0.662855 | \n", "-1.499 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 100 | \n", "1 | \n", "[{'CENTROID_ID': 1, 'DISTANCE': 1.101117711572... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.912576 | \n", "0.814515 | \n", "-0.771966 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 101 | \n", "3 | \n", "[{'CENTROID_ID': 3, 'DISTANCE': 0.823832007899... | \n", "[{'index': 1, 'value': 1.0}] | \n", "0.549739 | \n", "-1.308724 | \n", "1.554546 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
| 102 | \n", "4 | \n", "[{'CENTROID_ID': 4, 'DISTANCE': 0.995348310182... | \n", "[{'index': 2, 'value': 1.0}] | \n", "-0.126582 | \n", "0.662855 | \n", "-0.626559 | \n", "[{'index': 3, 'value': 1.0}] | \n", "[{'index': 1, 'value': 1.0}] | \n", "
| 107 | \n", "3 | \n", "[{'CENTROID_ID': 3, 'DISTANCE': 0.930021405831... | \n", "[{'index': 1, 'value': 1.0}] | \n", "1.20778 | \n", "-1.005404 | \n", "1.118325 | \n", "[{'index': 2, 'value': 1.0}] | \n", "[{'index': 3, 'value': 1.0}] | \n", "
25 rows × 8 columns
\n", "| \n", " | predicted_body_mass_g | \n", "island | \n", "culmen_length_mm | \n", "culmen_depth_mm | \n", "flipper_length_mm | \n", "sex | \n", "species | \n", "
|---|---|---|---|---|---|---|---|
| penguin_id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 1 | \n", "4772.374547 | \n", "Biscoe | \n", "45.1 | \n", "14.5 | \n", "215.0 | \n", "FEMALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 15 | \n", "3883.371052 | \n", "Dream | \n", "41.1 | \n", "17.5 | \n", "190.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 28 | \n", "3479.706166 | \n", "Dream | \n", "38.1 | \n", "18.6 | \n", "190.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 32 | \n", "4223.851137 | \n", "Dream | \n", "51.9 | \n", "19.5 | \n", "206.0 | \n", "MALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 33 | \n", "3197.620461 | \n", "Dream | \n", "42.5 | \n", "17.3 | \n", "187.0 | \n", "FEMALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 34 | \n", "4155.265191 | \n", "Dream | \n", "41.1 | \n", "18.1 | \n", "205.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 37 | \n", "3991.311319 | \n", "Dream | \n", "51.3 | \n", "18.2 | \n", "197.0 | \n", "MALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 41 | \n", "3232.644783 | \n", "Torgersen | \n", "40.2 | \n", "17.0 | \n", "176.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 47 | \n", "4017.738303 | \n", "Dream | \n", "37.5 | \n", "18.5 | \n", "199.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 52 | \n", "3365.077659 | \n", "Dream | \n", "46.4 | \n", "17.8 | \n", "191.0 | \n", "FEMALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 56 | \n", "3791.328893 | \n", "Biscoe | \n", "38.2 | \n", "18.1 | \n", "185.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 57 | \n", "3547.890609 | \n", "Biscoe | \n", "38.6 | \n", "17.2 | \n", "199.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 62 | \n", "5372.086117 | \n", "Biscoe | \n", "49.3 | \n", "15.7 | \n", "217.0 | \n", "MALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 65 | \n", "4263.229571 | \n", "Dream | \n", "53.5 | \n", "19.9 | \n", "205.0 | \n", "MALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 67 | \n", "5234.457401 | \n", "Biscoe | \n", "47.6 | \n", "14.5 | \n", "215.0 | \n", "MALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 75 | \n", "3979.311469 | \n", "Biscoe | \n", "37.8 | \n", "20.0 | \n", "190.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 81 | \n", "3481.328573 | \n", "Dream | \n", "47.6 | \n", "18.3 | \n", "195.0 | \n", "FEMALE | \n", "Chinstrap penguin (Pygoscelis antarctica) | \n", "
| 89 | \n", "3915.237615 | \n", "Dream | \n", "39.2 | \n", "18.6 | \n", "190.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 92 | \n", "3425.560982 | \n", "Dream | \n", "39.5 | \n", "17.8 | \n", "188.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 93 | \n", "4141.494969 | \n", "Biscoe | \n", "42.2 | \n", "19.5 | \n", "197.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 96 | \n", "3394.719445 | \n", "Dream | \n", "42.2 | \n", "18.5 | \n", "180.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 100 | \n", "3507.223965 | \n", "Dream | \n", "38.9 | \n", "18.8 | \n", "190.0 | \n", "FEMALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 101 | \n", "4922.284991 | \n", "Biscoe | \n", "46.9 | \n", "14.6 | \n", "222.0 | \n", "FEMALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
| 102 | \n", "4016.240318 | \n", "Dream | \n", "43.2 | \n", "18.5 | \n", "192.0 | \n", "MALE | \n", "Adelie Penguin (Pygoscelis adeliae) | \n", "
| 107 | \n", "4933.653758 | \n", "Biscoe | \n", "50.5 | \n", "15.2 | \n", "216.0 | \n", "FEMALE | \n", "Gentoo penguin (Pygoscelis papua) | \n", "
25 rows × 7 columns
\n", "| \n", " | mean_absolute_error | \n", "mean_squared_error | \n", "mean_squared_log_error | \n", "median_absolute_error | \n", "r2_score | \n", "explained_variance | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "225.883512 | \n", "77765.989281 | \n", "0.004457 | \n", "179.548041 | \n", "0.873166 | \n", "0.873315 | \n", "
1 rows × 6 columns
\n", "