{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<!---\n", "https://pbpython.com/categorical-encoding.html\n", "--->" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Aus der [Pandas Dokumentation, Abschnitt \"Comparison with SQL\"](https://pandas.pydata.org/docs/getting_started/comparison/comparison_with_sql.html)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[244 rows x 7 columns]\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "url = (\n", " \"https://raw.github.com/pandas-dev\"\n", " \"/pandas/main/pandas/tests/io/data/csv/tips.csv\"\n", ")\n", "\n", "tips = pd.read_csv(url)\n", "print(tips)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Operationen auf Pandas DataFrames erzeugen Kopien, während der ursprüngliche DataFrame unverändert bleibt." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[244 rows x 7 columns]\n", "\n", "Das ist ja gar nicht sortiert! Das nächste aber:\n", "\n", " total_bill tip sex smoker day time size\n", "67 3.07 1.00 Female Yes Sat Dinner 1\n", "92 5.75 1.00 Female Yes Fri Dinner 2\n", "111 7.25 1.00 Female No Sat Dinner 1\n", "172 7.25 5.15 Male Yes Sun Dinner 2\n", "149 7.51 2.00 Male No Thur Lunch 2\n", ".. ... ... ... ... ... ... ...\n", "182 45.35 3.50 Male Yes Sun Dinner 3\n", "156 48.17 5.00 Male No Sun Dinner 6\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "\n", "[244 rows x 7 columns]\n" ] } ], "source": [ "tips.sort_values(\"total_bill\")\n", "print(tips)\n", "print(\"\\nDas ist ja gar nicht sortiert! Das nächste aber:\\n\")\n", "sorted_tips = tips.sort_values(\"total_bill\")\n", "print(sorted_tips)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Spalten auswählen ist `SELECT`:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>smoker</th>\n", " <th>time</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>16.99</td>\n", " <td>1.01</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>10.34</td>\n", " <td>1.66</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>21.01</td>\n", " <td>3.50</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>23.68</td>\n", " <td>3.31</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>24.59</td>\n", " <td>3.61</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>239</th>\n", " <td>29.03</td>\n", " <td>5.92</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>240</th>\n", " <td>27.18</td>\n", " <td>2.00</td>\n", " <td>Yes</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>241</th>\n", " <td>22.67</td>\n", " <td>2.00</td>\n", " <td>Yes</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>242</th>\n", " <td>17.82</td>\n", " <td>1.75</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " <tr>\n", " <th>243</th>\n", " <td>18.78</td>\n", " <td>3.00</td>\n", " <td>No</td>\n", " <td>Dinner</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>244 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " total_bill tip smoker time\n", "0 16.99 1.01 No Dinner\n", "1 10.34 1.66 No Dinner\n", "2 21.01 3.50 No Dinner\n", "3 23.68 3.31 No Dinner\n", "4 24.59 3.61 No Dinner\n", ".. ... ... ... ...\n", "239 29.03 5.92 No Dinner\n", "240 27.18 2.00 Yes Dinner\n", "241 22.67 2.00 Yes Dinner\n", "242 17.82 1.75 No Dinner\n", "243 18.78 3.00 No Dinner\n", "\n", "[244 rows x 4 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT total_bill, tip, smoker, time FROM tips;\n", "tips[[\"total_bill\", \"tip\", \"smoker\", \"time\"]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Man kann neue Spalten hinzufügen, die sich aus alten Spalten berechnen:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " <th>tip_rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>16.99</td>\n", " <td>1.01</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.059447</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>10.34</td>\n", " <td>1.66</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " <td>0.160542</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>21.01</td>\n", " <td>3.50</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " <td>0.166587</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>23.68</td>\n", " <td>3.31</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.139780</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>24.59</td>\n", " <td>3.61</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " <td>0.146808</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>239</th>\n", " <td>29.03</td>\n", " <td>5.92</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " <td>0.203927</td>\n", " </tr>\n", " <tr>\n", " <th>240</th>\n", " <td>27.18</td>\n", " <td>2.00</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.073584</td>\n", " </tr>\n", " <tr>\n", " <th>241</th>\n", " <td>22.67</td>\n", " <td>2.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.088222</td>\n", " </tr>\n", " <tr>\n", " <th>242</th>\n", " <td>17.82</td>\n", " <td>1.75</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.098204</td>\n", " </tr>\n", " <tr>\n", " <th>243</th>\n", " <td>18.78</td>\n", " <td>3.00</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " <td>0.159744</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>244 rows × 8 columns</p>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size tip_rate\n", "0 16.99 1.01 Female No Sun Dinner 2 0.059447\n", "1 10.34 1.66 Male No Sun Dinner 3 0.160542\n", "2 21.01 3.50 Male No Sun Dinner 3 0.166587\n", "3 23.68 3.31 Male No Sun Dinner 2 0.139780\n", "4 24.59 3.61 Female No Sun Dinner 4 0.146808\n", ".. ... ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3 0.203927\n", "240 27.18 2.00 Female Yes Sat Dinner 2 0.073584\n", "241 22.67 2.00 Male Yes Sat Dinner 2 0.088222\n", "242 17.82 1.75 Male No Sat Dinner 2 0.098204\n", "243 18.78 3.00 Female No Thur Dinner 2 0.159744\n", "\n", "[244 rows x 8 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT *, tip/total_bill as tip_rate FROM tips;\n", "tips.assign(tip_rate=tips[\"tip\"] / tips[\"total_bill\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Daten filtern (nach Bedingungen Zeilen auswählen) mit `WHERE`" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>16.99</td>\n", " <td>1.01</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>10.34</td>\n", " <td>1.66</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>21.01</td>\n", " <td>3.50</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>23.68</td>\n", " <td>3.31</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>24.59</td>\n", " <td>3.61</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>239</th>\n", " <td>29.03</td>\n", " <td>5.92</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>240</th>\n", " <td>27.18</td>\n", " <td>2.00</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>241</th>\n", " <td>22.67</td>\n", " <td>2.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>242</th>\n", " <td>17.82</td>\n", " <td>1.75</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>243</th>\n", " <td>18.78</td>\n", " <td>3.00</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>176 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "0 16.99 1.01 Female No Sun Dinner 2\n", "1 10.34 1.66 Male No Sun Dinner 3\n", "2 21.01 3.50 Male No Sun Dinner 3\n", "3 23.68 3.31 Male No Sun Dinner 2\n", "4 24.59 3.61 Female No Sun Dinner 4\n", ".. ... ... ... ... ... ... ...\n", "239 29.03 5.92 Male No Sat Dinner 3\n", "240 27.18 2.00 Female Yes Sat Dinner 2\n", "241 22.67 2.00 Male Yes Sat Dinner 2\n", "242 17.82 1.75 Male No Sat Dinner 2\n", "243 18.78 3.00 Female No Thur Dinner 2\n", "\n", "[176 rows x 7 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM tips WHERE time = 'Dinner';\n", "is_dinner = tips[\"time\"] == \"Dinner\" # Bitmaske\n", "tips[is_dinner]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Mehrere Bedingungen logisch verknüpfen mit `OR |` oder `AND &`:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>23</th>\n", " <td>39.42</td>\n", " <td>7.58</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>59</th>\n", " <td>48.27</td>\n", " <td>6.73</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>170</th>\n", " <td>50.81</td>\n", " <td>10.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>183</th>\n", " <td>23.17</td>\n", " <td>6.50</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>212</th>\n", " <td>48.33</td>\n", " <td>9.00</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>214</th>\n", " <td>28.17</td>\n", " <td>6.50</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "183 23.17 6.50 Male Yes Sun Dinner 4\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "214 28.17 6.50 Female Yes Sat Dinner 3" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM tips WHERE time = 'Dinner' AND tip > 6.00;\n", "tips[(tips[\"time\"] == \"Dinner\") & (tips[\"tip\"] > 6.00)]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>59</th>\n", " <td>48.27</td>\n", " <td>6.73</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>125</th>\n", " <td>29.80</td>\n", " <td>4.20</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Lunch</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>141</th>\n", " <td>34.30</td>\n", " <td>6.70</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Lunch</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>143</th>\n", " <td>27.05</td>\n", " <td>5.00</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Lunch</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>156</th>\n", " <td>48.17</td>\n", " <td>5.00</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>170</th>\n", " <td>50.81</td>\n", " <td>10.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>182</th>\n", " <td>45.35</td>\n", " <td>3.50</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>212</th>\n", " <td>48.33</td>\n", " <td>9.00</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "125 29.80 4.20 Female No Thur Lunch 6\n", "141 34.30 6.70 Male No Thur Lunch 6\n", "143 27.05 5.00 Female No Thur Lunch 6\n", "156 48.17 5.00 Male No Sun Dinner 6\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "182 45.35 3.50 Male Yes Sun Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM tips WHERE size >= 6 OR total_bill > 45;\n", "tips[(tips[\"size\"] >= 6) | (tips[\"total_bill\"] > 45)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Daten in Gruppen aufspalten, aggregierte Statistiken ausrechnen, Gruppen zusammenführen - das ist in SQL ein `GROUP BY`, in Pandas `groupby`:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sex\n", "Female 87\n", "Male 157\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT sex, count(*) FROM tips GROUP BY sex;\n", "tips.groupby(\"sex\").size()\n", "# tips.groupby(\"sex\").count() wäre falsch, das zählt nicht-NA-Einträge!" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>tip</th>\n", " <th>day</th>\n", " </tr>\n", " <tr>\n", " <th>day</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Fri</th>\n", " <td>2.734737</td>\n", " <td>19</td>\n", " </tr>\n", " <tr>\n", " <th>Sat</th>\n", " <td>2.993103</td>\n", " <td>87</td>\n", " </tr>\n", " <tr>\n", " <th>Sun</th>\n", " <td>3.255132</td>\n", " <td>76</td>\n", " </tr>\n", " <tr>\n", " <th>Thur</th>\n", " <td>2.771452</td>\n", " <td>62</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " tip day\n", "day \n", "Fri 2.734737 19\n", "Sat 2.993103 87\n", "Sun 3.255132 76\n", "Thur 2.771452 62" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT day, AVG(tip), COUNT(*) FROM tips GROUP BY day;\n", "tips.groupby(\"day\").agg({\"tip\": \"mean\", \"day\": np.size})" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th></th>\n", " <th></th>\n", " <th colspan=\"2\" halign=\"left\">tip</th>\n", " </tr>\n", " <tr>\n", " <th></th>\n", " <th></th>\n", " <th>size</th>\n", " <th>mean</th>\n", " </tr>\n", " <tr>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">No</th>\n", " <th>Fri</th>\n", " <td>4</td>\n", " <td>2.812500</td>\n", " </tr>\n", " <tr>\n", " <th>Sat</th>\n", " <td>45</td>\n", " <td>3.102889</td>\n", " </tr>\n", " <tr>\n", " <th>Sun</th>\n", " <td>57</td>\n", " <td>3.167895</td>\n", " </tr>\n", " <tr>\n", " <th>Thur</th>\n", " <td>45</td>\n", " <td>2.673778</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">Yes</th>\n", " <th>Fri</th>\n", " <td>15</td>\n", " <td>2.714000</td>\n", " </tr>\n", " <tr>\n", " <th>Sat</th>\n", " <td>42</td>\n", " <td>2.875476</td>\n", " </tr>\n", " <tr>\n", " <th>Sun</th>\n", " <td>19</td>\n", " <td>3.516842</td>\n", " </tr>\n", " <tr>\n", " <th>Thur</th>\n", " <td>17</td>\n", " <td>3.030000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " tip \n", " size mean\n", "smoker day \n", "No Fri 4 2.812500\n", " Sat 45 3.102889\n", " Sun 57 3.167895\n", " Thur 45 2.673778\n", "Yes Fri 15 2.714000\n", " Sat 42 2.875476\n", " Sun 19 3.516842\n", " Thur 17 3.030000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT smoker, day, COUNT(*), AVG(tip) FROM tips GROUP BY smoker, day;\n", "tips.groupby([\"smoker\", \"day\"]).agg({\"tip\": [np.size, \"mean\"]})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tabellen verknüpfen mit `JOIN` bzw. `merge`:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " key value\n", "0 A -0.160480\n", "1 B 0.507701\n", "2 C -0.819206\n", "3 D -1.768110 \n", " key value\n", "0 B 0.790226\n", "1 D -1.259556\n", "2 D -0.221251\n", "3 E -1.401175\n" ] } ], "source": [ "df1 = pd.DataFrame({\"key\": [\"A\", \"B\", \"C\", \"D\"], \"value\": np.random.randn(4)})\n", "df2 = pd.DataFrame({\"key\": [\"B\", \"D\", \"D\", \"E\"], \"value\": np.random.randn(4)})\n", "print(df1, \"\\n\", df2)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>key</th>\n", " <th>value_x</th>\n", " <th>value_y</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>B</td>\n", " <td>0.507701</td>\n", " <td>0.790226</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-1.259556</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-0.221251</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " key value_x value_y\n", "0 B 0.507701 0.790226\n", "1 D -1.768110 -1.259556\n", "2 D -1.768110 -0.221251" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM df1 INNER JOIN df2 ON df1.key = df2.key;\n", "pd.merge(df1, df2, on=\"key\") # inner join ist der default" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>key</th>\n", " <th>value_x</th>\n", " <th>value_y</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>A</td>\n", " <td>-0.160480</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>B</td>\n", " <td>0.507701</td>\n", " <td>0.790226</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C</td>\n", " <td>-0.819206</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-1.259556</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-0.221251</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " key value_x value_y\n", "0 A -0.160480 NaN\n", "1 B 0.507701 0.790226\n", "2 C -0.819206 NaN\n", "3 D -1.768110 -1.259556\n", "4 D -1.768110 -0.221251" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM df1 LEFT OUTER JOIN df2 ON df1.key = df2.key;\n", "pd.merge(df1, df2, on=\"key\", how=\"left\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>key</th>\n", " <th>value_x</th>\n", " <th>value_y</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>B</td>\n", " <td>0.507701</td>\n", " <td>0.790226</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-1.259556</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-0.221251</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>E</td>\n", " <td>NaN</td>\n", " <td>-1.401175</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " key value_x value_y\n", "0 B 0.507701 0.790226\n", "1 D -1.768110 -1.259556\n", "2 D -1.768110 -0.221251\n", "3 E NaN -1.401175" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM df1 RIGHT OUTER JOIN df2 ON df1.key = df2.key;\n", "pd.merge(df1, df2, on=\"key\", how=\"right\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>key</th>\n", " <th>value_x</th>\n", " <th>value_y</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>A</td>\n", " <td>-0.160480</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>B</td>\n", " <td>0.507701</td>\n", " <td>0.790226</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>C</td>\n", " <td>-0.819206</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-1.259556</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>D</td>\n", " <td>-1.768110</td>\n", " <td>-0.221251</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>E</td>\n", " <td>NaN</td>\n", " <td>-1.401175</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " key value_x value_y\n", "0 A -0.160480 NaN\n", "1 B 0.507701 0.790226\n", "2 C -0.819206 NaN\n", "3 D -1.768110 -1.259556\n", "4 D -1.768110 -0.221251\n", "5 E NaN -1.401175" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# SELECT * FROM df1 FULL OUTER JOIN df2 ON df1.key = df2.key;\n", "pd.merge(df1, df2, on=\"key\", how=\"outer\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Daten verändern geht in SQL mit `UPDATE`, in Pandas führen wir einfach Python Code aus, als wenn wir eine einzelne Variable ändern wollten:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>67</th>\n", " <td>3.07</td>\n", " <td>1.00</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>236</th>\n", " <td>12.60</td>\n", " <td>1.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>92</th>\n", " <td>5.75</td>\n", " <td>1.00</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Fri</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>111</th>\n", " <td>7.25</td>\n", " <td>1.00</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>16.99</td>\n", " <td>1.01</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "67 3.07 1.00 Female Yes Sat Dinner 1\n", "236 12.60 1.00 Male Yes Sat Dinner 2\n", "92 5.75 1.00 Female Yes Fri Dinner 2\n", "111 7.25 1.00 Female No Sat Dinner 1\n", "0 16.99 1.01 Female No Sun Dinner 2" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zustand vor der Änderung:\n", "tips.sort_values(\"tip\").head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>67</th>\n", " <td>3.07</td>\n", " <td>2.0</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>159</th>\n", " <td>16.49</td>\n", " <td>2.0</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>162</th>\n", " <td>16.21</td>\n", " <td>2.0</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>61</th>\n", " <td>13.81</td>\n", " <td>2.0</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>163</th>\n", " <td>13.81</td>\n", " <td>2.0</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>2</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "67 3.07 2.0 Female Yes Sat Dinner 1\n", "159 16.49 2.0 Male No Sun Dinner 4\n", "162 16.21 2.0 Female No Sun Dinner 3\n", "61 13.81 2.0 Male Yes Sat Dinner 2\n", "163 13.81 2.0 Male No Sun Dinner 2" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# UPDATE tips SET tip = tip*2 WHERE tip < 2;\n", "tips.loc[tips[\"tip\"] < 2, \"tip\"] *= 2\n", "tips.sort_values(\"tip\").head()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>170</th>\n", " <td>50.81</td>\n", " <td>10.00</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>212</th>\n", " <td>48.33</td>\n", " <td>9.00</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>39.42</td>\n", " <td>7.58</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>59</th>\n", " <td>48.27</td>\n", " <td>6.73</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>141</th>\n", " <td>34.30</td>\n", " <td>6.70</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Lunch</td>\n", " <td>6</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "170 50.81 10.00 Male Yes Sat Dinner 3\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "141 34.30 6.70 Male No Thur Lunch 6" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zustand vor der nächsten Änderung:\n", "tips.sort_values(\"tip\", ascending=False).head()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>total_bill</th>\n", " <th>tip</th>\n", " <th>sex</th>\n", " <th>smoker</th>\n", " <th>day</th>\n", " <th>time</th>\n", " <th>size</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>212</th>\n", " <td>48.33</td>\n", " <td>9.00</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>39.42</td>\n", " <td>7.58</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>59</th>\n", " <td>48.27</td>\n", " <td>6.73</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Sat</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>141</th>\n", " <td>34.30</td>\n", " <td>6.70</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Thur</td>\n", " <td>Lunch</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>183</th>\n", " <td>23.17</td>\n", " <td>6.50</td>\n", " <td>Male</td>\n", " <td>Yes</td>\n", " <td>Sun</td>\n", " <td>Dinner</td>\n", " <td>4</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " total_bill tip sex smoker day time size\n", "212 48.33 9.00 Male No Sat Dinner 4\n", "23 39.42 7.58 Male No Sat Dinner 4\n", "59 48.27 6.73 Male No Sat Dinner 4\n", "141 34.30 6.70 Male No Thur Lunch 6\n", "183 23.17 6.50 Male Yes Sun Dinner 4" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# DELETE FROM tips WHERE tip > 9;\n", "tips = tips.loc[tips[\"tip\"] <= 9]\n", "tips.sort_values(\"tip\", ascending=False).head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 4 }