{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<!---\n",
    "https://pbpython.com/categorical-encoding.html\n",
    "--->"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Aus der [Pandas Dokumentation, Abschnitt \"Comparison with SQL\"](https://pandas.pydata.org/docs/getting_started/comparison/comparison_with_sql.html)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     total_bill   tip     sex smoker   day    time  size\n",
      "0         16.99  1.01  Female     No   Sun  Dinner     2\n",
      "1         10.34  1.66    Male     No   Sun  Dinner     3\n",
      "2         21.01  3.50    Male     No   Sun  Dinner     3\n",
      "3         23.68  3.31    Male     No   Sun  Dinner     2\n",
      "4         24.59  3.61  Female     No   Sun  Dinner     4\n",
      "..          ...   ...     ...    ...   ...     ...   ...\n",
      "239       29.03  5.92    Male     No   Sat  Dinner     3\n",
      "240       27.18  2.00  Female    Yes   Sat  Dinner     2\n",
      "241       22.67  2.00    Male    Yes   Sat  Dinner     2\n",
      "242       17.82  1.75    Male     No   Sat  Dinner     2\n",
      "243       18.78  3.00  Female     No  Thur  Dinner     2\n",
      "\n",
      "[244 rows x 7 columns]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "url = (\n",
    "    \"https://raw.github.com/pandas-dev\"\n",
    "    \"/pandas/main/pandas/tests/io/data/csv/tips.csv\"\n",
    ")\n",
    "\n",
    "tips = pd.read_csv(url)\n",
    "print(tips)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Operationen auf Pandas DataFrames erzeugen Kopien, während der ursprüngliche DataFrame unverändert bleibt."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     total_bill   tip     sex smoker   day    time  size\n",
      "0         16.99  1.01  Female     No   Sun  Dinner     2\n",
      "1         10.34  1.66    Male     No   Sun  Dinner     3\n",
      "2         21.01  3.50    Male     No   Sun  Dinner     3\n",
      "3         23.68  3.31    Male     No   Sun  Dinner     2\n",
      "4         24.59  3.61  Female     No   Sun  Dinner     4\n",
      "..          ...   ...     ...    ...   ...     ...   ...\n",
      "239       29.03  5.92    Male     No   Sat  Dinner     3\n",
      "240       27.18  2.00  Female    Yes   Sat  Dinner     2\n",
      "241       22.67  2.00    Male    Yes   Sat  Dinner     2\n",
      "242       17.82  1.75    Male     No   Sat  Dinner     2\n",
      "243       18.78  3.00  Female     No  Thur  Dinner     2\n",
      "\n",
      "[244 rows x 7 columns]\n",
      "\n",
      "Das ist ja gar nicht sortiert! Das nächste aber:\n",
      "\n",
      "     total_bill    tip     sex smoker   day    time  size\n",
      "67         3.07   1.00  Female    Yes   Sat  Dinner     1\n",
      "92         5.75   1.00  Female    Yes   Fri  Dinner     2\n",
      "111        7.25   1.00  Female     No   Sat  Dinner     1\n",
      "172        7.25   5.15    Male    Yes   Sun  Dinner     2\n",
      "149        7.51   2.00    Male     No  Thur   Lunch     2\n",
      "..          ...    ...     ...    ...   ...     ...   ...\n",
      "182       45.35   3.50    Male    Yes   Sun  Dinner     3\n",
      "156       48.17   5.00    Male     No   Sun  Dinner     6\n",
      "59        48.27   6.73    Male     No   Sat  Dinner     4\n",
      "212       48.33   9.00    Male     No   Sat  Dinner     4\n",
      "170       50.81  10.00    Male    Yes   Sat  Dinner     3\n",
      "\n",
      "[244 rows x 7 columns]\n"
     ]
    }
   ],
   "source": [
    "tips.sort_values(\"total_bill\")\n",
    "print(tips)\n",
    "print(\"\\nDas ist ja gar nicht sortiert! Das nächste aber:\\n\")\n",
    "sorted_tips = tips.sort_values(\"total_bill\")\n",
    "print(sorted_tips)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Spalten auswählen ist `SELECT`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>smoker</th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>29.03</td>\n",
       "      <td>5.92</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>27.18</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>22.67</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>17.82</td>\n",
       "      <td>1.75</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>18.78</td>\n",
       "      <td>3.00</td>\n",
       "      <td>No</td>\n",
       "      <td>Dinner</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>244 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip smoker    time\n",
       "0         16.99  1.01     No  Dinner\n",
       "1         10.34  1.66     No  Dinner\n",
       "2         21.01  3.50     No  Dinner\n",
       "3         23.68  3.31     No  Dinner\n",
       "4         24.59  3.61     No  Dinner\n",
       "..          ...   ...    ...     ...\n",
       "239       29.03  5.92     No  Dinner\n",
       "240       27.18  2.00    Yes  Dinner\n",
       "241       22.67  2.00    Yes  Dinner\n",
       "242       17.82  1.75     No  Dinner\n",
       "243       18.78  3.00     No  Dinner\n",
       "\n",
       "[244 rows x 4 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT total_bill, tip, smoker, time FROM tips;\n",
    "tips[[\"total_bill\", \"tip\", \"smoker\", \"time\"]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Man kann neue Spalten hinzufügen, die sich aus alten Spalten berechnen:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "      <th>tip_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.059447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.160542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.166587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.139780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "      <td>0.146808</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>29.03</td>\n",
       "      <td>5.92</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "      <td>0.203927</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>27.18</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.073584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>22.67</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.088222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>17.82</td>\n",
       "      <td>1.75</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.098204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>18.78</td>\n",
       "      <td>3.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "      <td>0.159744</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>244 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip     sex smoker   day    time  size  tip_rate\n",
       "0         16.99  1.01  Female     No   Sun  Dinner     2  0.059447\n",
       "1         10.34  1.66    Male     No   Sun  Dinner     3  0.160542\n",
       "2         21.01  3.50    Male     No   Sun  Dinner     3  0.166587\n",
       "3         23.68  3.31    Male     No   Sun  Dinner     2  0.139780\n",
       "4         24.59  3.61  Female     No   Sun  Dinner     4  0.146808\n",
       "..          ...   ...     ...    ...   ...     ...   ...       ...\n",
       "239       29.03  5.92    Male     No   Sat  Dinner     3  0.203927\n",
       "240       27.18  2.00  Female    Yes   Sat  Dinner     2  0.073584\n",
       "241       22.67  2.00    Male    Yes   Sat  Dinner     2  0.088222\n",
       "242       17.82  1.75    Male     No   Sat  Dinner     2  0.098204\n",
       "243       18.78  3.00  Female     No  Thur  Dinner     2  0.159744\n",
       "\n",
       "[244 rows x 8 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT *, tip/total_bill as tip_rate FROM tips;\n",
    "tips.assign(tip_rate=tips[\"tip\"] / tips[\"total_bill\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Daten filtern (nach Bedingungen Zeilen auswählen) mit `WHERE`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10.34</td>\n",
       "      <td>1.66</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21.01</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.68</td>\n",
       "      <td>3.31</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24.59</td>\n",
       "      <td>3.61</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>29.03</td>\n",
       "      <td>5.92</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>27.18</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>22.67</td>\n",
       "      <td>2.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>17.82</td>\n",
       "      <td>1.75</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>18.78</td>\n",
       "      <td>3.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>176 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip     sex smoker   day    time  size\n",
       "0         16.99  1.01  Female     No   Sun  Dinner     2\n",
       "1         10.34  1.66    Male     No   Sun  Dinner     3\n",
       "2         21.01  3.50    Male     No   Sun  Dinner     3\n",
       "3         23.68  3.31    Male     No   Sun  Dinner     2\n",
       "4         24.59  3.61  Female     No   Sun  Dinner     4\n",
       "..          ...   ...     ...    ...   ...     ...   ...\n",
       "239       29.03  5.92    Male     No   Sat  Dinner     3\n",
       "240       27.18  2.00  Female    Yes   Sat  Dinner     2\n",
       "241       22.67  2.00    Male    Yes   Sat  Dinner     2\n",
       "242       17.82  1.75    Male     No   Sat  Dinner     2\n",
       "243       18.78  3.00  Female     No  Thur  Dinner     2\n",
       "\n",
       "[176 rows x 7 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM tips WHERE time = 'Dinner';\n",
    "is_dinner = tips[\"time\"] == \"Dinner\" # Bitmaske\n",
    "tips[is_dinner]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Mehrere Bedingungen logisch verknüpfen mit `OR |` oder `AND &`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>39.42</td>\n",
       "      <td>7.58</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>48.27</td>\n",
       "      <td>6.73</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>50.81</td>\n",
       "      <td>10.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>23.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>48.33</td>\n",
       "      <td>9.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>28.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill    tip     sex smoker  day    time  size\n",
       "23        39.42   7.58    Male     No  Sat  Dinner     4\n",
       "59        48.27   6.73    Male     No  Sat  Dinner     4\n",
       "170       50.81  10.00    Male    Yes  Sat  Dinner     3\n",
       "183       23.17   6.50    Male    Yes  Sun  Dinner     4\n",
       "212       48.33   9.00    Male     No  Sat  Dinner     4\n",
       "214       28.17   6.50  Female    Yes  Sat  Dinner     3"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM tips WHERE time = 'Dinner' AND tip > 6.00;\n",
    "tips[(tips[\"time\"] == \"Dinner\") & (tips[\"tip\"] > 6.00)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>48.27</td>\n",
       "      <td>6.73</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>29.80</td>\n",
       "      <td>4.20</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>34.30</td>\n",
       "      <td>6.70</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>143</th>\n",
       "      <td>27.05</td>\n",
       "      <td>5.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>48.17</td>\n",
       "      <td>5.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>50.81</td>\n",
       "      <td>10.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>182</th>\n",
       "      <td>45.35</td>\n",
       "      <td>3.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>48.33</td>\n",
       "      <td>9.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill    tip     sex smoker   day    time  size\n",
       "59        48.27   6.73    Male     No   Sat  Dinner     4\n",
       "125       29.80   4.20  Female     No  Thur   Lunch     6\n",
       "141       34.30   6.70    Male     No  Thur   Lunch     6\n",
       "143       27.05   5.00  Female     No  Thur   Lunch     6\n",
       "156       48.17   5.00    Male     No   Sun  Dinner     6\n",
       "170       50.81  10.00    Male    Yes   Sat  Dinner     3\n",
       "182       45.35   3.50    Male    Yes   Sun  Dinner     3\n",
       "212       48.33   9.00    Male     No   Sat  Dinner     4"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM tips WHERE size >= 6 OR total_bill > 45;\n",
    "tips[(tips[\"size\"] >= 6) | (tips[\"total_bill\"] > 45)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Daten in Gruppen aufspalten, aggregierte Statistiken ausrechnen, Gruppen zusammenführen - das ist in SQL ein `GROUP BY`, in Pandas `groupby`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sex\n",
       "Female     87\n",
       "Male      157\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT sex, count(*) FROM tips GROUP BY sex;\n",
    "tips.groupby(\"sex\").size()\n",
    "# tips.groupby(\"sex\").count() wäre falsch, das zählt nicht-NA-Einträge!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tip</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Fri</th>\n",
       "      <td>2.734737</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>2.993103</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>3.255132</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>2.771452</td>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           tip  day\n",
       "day                \n",
       "Fri   2.734737   19\n",
       "Sat   2.993103   87\n",
       "Sun   3.255132   76\n",
       "Thur  2.771452   62"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT day, AVG(tip), COUNT(*) FROM tips GROUP BY day;\n",
    "tips.groupby(\"day\").agg({\"tip\": \"mean\", \"day\": np.size})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">tip</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>size</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">No</th>\n",
       "      <th>Fri</th>\n",
       "      <td>4</td>\n",
       "      <td>2.812500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>45</td>\n",
       "      <td>3.102889</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>57</td>\n",
       "      <td>3.167895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>45</td>\n",
       "      <td>2.673778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">Yes</th>\n",
       "      <th>Fri</th>\n",
       "      <td>15</td>\n",
       "      <td>2.714000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sat</th>\n",
       "      <td>42</td>\n",
       "      <td>2.875476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sun</th>\n",
       "      <td>19</td>\n",
       "      <td>3.516842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Thur</th>\n",
       "      <td>17</td>\n",
       "      <td>3.030000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             tip          \n",
       "            size      mean\n",
       "smoker day                \n",
       "No     Fri     4  2.812500\n",
       "       Sat    45  3.102889\n",
       "       Sun    57  3.167895\n",
       "       Thur   45  2.673778\n",
       "Yes    Fri    15  2.714000\n",
       "       Sat    42  2.875476\n",
       "       Sun    19  3.516842\n",
       "       Thur   17  3.030000"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT smoker, day, COUNT(*), AVG(tip) FROM tips GROUP BY smoker, day;\n",
    "tips.groupby([\"smoker\", \"day\"]).agg({\"tip\": [np.size, \"mean\"]})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Tabellen verknüpfen mit `JOIN` bzw. `merge`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  key     value\n",
      "0   A -0.160480\n",
      "1   B  0.507701\n",
      "2   C -0.819206\n",
      "3   D -1.768110 \n",
      "   key     value\n",
      "0   B  0.790226\n",
      "1   D -1.259556\n",
      "2   D -0.221251\n",
      "3   E -1.401175\n"
     ]
    }
   ],
   "source": [
    "df1 = pd.DataFrame({\"key\": [\"A\", \"B\", \"C\", \"D\"], \"value\": np.random.randn(4)})\n",
    "df2 = pd.DataFrame({\"key\": [\"B\", \"D\", \"D\", \"E\"], \"value\": np.random.randn(4)})\n",
    "print(df1, \"\\n\", df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>value_x</th>\n",
       "      <th>value_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B</td>\n",
       "      <td>0.507701</td>\n",
       "      <td>0.790226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-1.259556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-0.221251</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   value_x   value_y\n",
       "0   B  0.507701  0.790226\n",
       "1   D -1.768110 -1.259556\n",
       "2   D -1.768110 -0.221251"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM df1 INNER JOIN df2 ON df1.key = df2.key;\n",
    "pd.merge(df1, df2, on=\"key\") # inner join ist der default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>value_x</th>\n",
       "      <th>value_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>-0.160480</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.507701</td>\n",
       "      <td>0.790226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.819206</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-1.259556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-0.221251</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   value_x   value_y\n",
       "0   A -0.160480       NaN\n",
       "1   B  0.507701  0.790226\n",
       "2   C -0.819206       NaN\n",
       "3   D -1.768110 -1.259556\n",
       "4   D -1.768110 -0.221251"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM df1 LEFT OUTER JOIN df2 ON df1.key = df2.key;\n",
    "pd.merge(df1, df2, on=\"key\", how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>value_x</th>\n",
       "      <th>value_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B</td>\n",
       "      <td>0.507701</td>\n",
       "      <td>0.790226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-1.259556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-0.221251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.401175</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   value_x   value_y\n",
       "0   B  0.507701  0.790226\n",
       "1   D -1.768110 -1.259556\n",
       "2   D -1.768110 -0.221251\n",
       "3   E       NaN -1.401175"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM df1 RIGHT OUTER JOIN df2 ON df1.key = df2.key;\n",
    "pd.merge(df1, df2, on=\"key\", how=\"right\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>value_x</th>\n",
       "      <th>value_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>-0.160480</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.507701</td>\n",
       "      <td>0.790226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.819206</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-1.259556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>D</td>\n",
       "      <td>-1.768110</td>\n",
       "      <td>-0.221251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.401175</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key   value_x   value_y\n",
       "0   A -0.160480       NaN\n",
       "1   B  0.507701  0.790226\n",
       "2   C -0.819206       NaN\n",
       "3   D -1.768110 -1.259556\n",
       "4   D -1.768110 -0.221251\n",
       "5   E       NaN -1.401175"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# SELECT * FROM df1 FULL OUTER JOIN df2 ON df1.key = df2.key;\n",
    "pd.merge(df1, df2, on=\"key\", how=\"outer\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Daten verändern geht in SQL mit `UPDATE`, in Pandas führen wir einfach Python Code aus, als wenn wir eine einzelne Variable ändern wollten:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>3.07</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236</th>\n",
       "      <td>12.60</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>5.75</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Fri</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>7.25</td>\n",
       "      <td>1.00</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.99</td>\n",
       "      <td>1.01</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip     sex smoker  day    time  size\n",
       "67         3.07  1.00  Female    Yes  Sat  Dinner     1\n",
       "236       12.60  1.00    Male    Yes  Sat  Dinner     2\n",
       "92         5.75  1.00  Female    Yes  Fri  Dinner     2\n",
       "111        7.25  1.00  Female     No  Sat  Dinner     1\n",
       "0         16.99  1.01  Female     No  Sun  Dinner     2"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Zustand vor der Änderung:\n",
    "tips.sort_values(\"tip\").head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>3.07</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>16.49</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>16.21</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>13.81</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>163</th>\n",
       "      <td>13.81</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill  tip     sex smoker  day    time  size\n",
       "67         3.07  2.0  Female    Yes  Sat  Dinner     1\n",
       "159       16.49  2.0    Male     No  Sun  Dinner     4\n",
       "162       16.21  2.0  Female     No  Sun  Dinner     3\n",
       "61        13.81  2.0    Male    Yes  Sat  Dinner     2\n",
       "163       13.81  2.0    Male     No  Sun  Dinner     2"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# UPDATE tips SET tip = tip*2 WHERE tip < 2;\n",
    "tips.loc[tips[\"tip\"] < 2, \"tip\"] *= 2\n",
    "tips.sort_values(\"tip\").head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>50.81</td>\n",
       "      <td>10.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>48.33</td>\n",
       "      <td>9.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>39.42</td>\n",
       "      <td>7.58</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>48.27</td>\n",
       "      <td>6.73</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>34.30</td>\n",
       "      <td>6.70</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill    tip   sex smoker   day    time  size\n",
       "170       50.81  10.00  Male    Yes   Sat  Dinner     3\n",
       "212       48.33   9.00  Male     No   Sat  Dinner     4\n",
       "23        39.42   7.58  Male     No   Sat  Dinner     4\n",
       "59        48.27   6.73  Male     No   Sat  Dinner     4\n",
       "141       34.30   6.70  Male     No  Thur   Lunch     6"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Zustand vor der nächsten Änderung:\n",
    "tips.sort_values(\"tip\", ascending=False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>total_bill</th>\n",
       "      <th>tip</th>\n",
       "      <th>sex</th>\n",
       "      <th>smoker</th>\n",
       "      <th>day</th>\n",
       "      <th>time</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>48.33</td>\n",
       "      <td>9.00</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>39.42</td>\n",
       "      <td>7.58</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>48.27</td>\n",
       "      <td>6.73</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Sat</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>141</th>\n",
       "      <td>34.30</td>\n",
       "      <td>6.70</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Thur</td>\n",
       "      <td>Lunch</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>23.17</td>\n",
       "      <td>6.50</td>\n",
       "      <td>Male</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Sun</td>\n",
       "      <td>Dinner</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     total_bill   tip   sex smoker   day    time  size\n",
       "212       48.33  9.00  Male     No   Sat  Dinner     4\n",
       "23        39.42  7.58  Male     No   Sat  Dinner     4\n",
       "59        48.27  6.73  Male     No   Sat  Dinner     4\n",
       "141       34.30  6.70  Male     No  Thur   Lunch     6\n",
       "183       23.17  6.50  Male    Yes   Sun  Dinner     4"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DELETE FROM tips WHERE tip > 9;\n",
    "tips = tips.loc[tips[\"tip\"] <= 9]\n",
    "tips.sort_values(\"tip\", ascending=False).head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}