{
"cells": [
{
"cell_type": "code",
"execution_count": 119,
"id": "e3e56e2b-d47e-43a2-9eb5-513a6063c09d",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "c8d0e24a-0dbd-4fb7-820f-4e0cc964e341",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"## Data Frames"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "4a7f7a67-59dd-4008-8c48-6374c5decb3b",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Username | \n",
" Age | \n",
" Joined Date | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" bogdan_s | \n",
" 18.0 | \n",
" 2032-01-01 | \n",
" 150 | \n",
" 500 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" jane_smith | \n",
" 35.0 | \n",
" 2032-02-15 | \n",
" 230 | \n",
" 720 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" alex123 | \n",
" 25.0 | \n",
" 2032-04-25 | \n",
" 80 | \n",
" 200 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" bob56 | \n",
" 38.0 | \n",
" 2032-06-21 | \n",
" 420 | \n",
" 940 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" mark_wilson | \n",
" NaN | \n",
" 2032-09-15 | \n",
" 310 | \n",
" 500 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Username Age Joined Date Total Posts Reputation\n",
"0 1 bogdan_s 18.0 2032-01-01 150 500\n",
"1 2 jane_smith 35.0 2032-02-15 230 720\n",
"2 3 alex123 25.0 2032-04-25 80 200\n",
"3 4 bob56 38.0 2032-06-21 420 940\n",
"4 5 mark_wilson NaN 2032-09-15 310 500"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"forum_users = {\n",
" 'User ID': np.array([1, 2, 3, 4, 5]),\n",
" 'Username': ['bogdan_s', 'jane_smith', 'alex123', 'bob56', 'mark_wilson'],\n",
" 'Age': [18, 35, 25, 38, None],\n",
" 'Joined Date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-25', '2032-06-21', '2032-09-15']),\n",
" 'Total Posts': [150, 230, 80, 420, 310],\n",
" 'Reputation': [500, 720, 200, 940, 500]\n",
"}\n",
"\n",
"df = pd.DataFrame(forum_users)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "433c340e-cf75-4c05-a8e7-e9e518bfc19e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(5, 6)"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 122,
"id": "b1841365-e8a1-4b5d-a07d-70250877dde0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"id": "72f5fea6-2c02-457f-bcc2-ad97095d6958",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts',\n",
" 'Reputation'],\n",
" dtype='object')"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 124,
"id": "6ead64b1-240e-4892-ad2c-5d057453a728",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.indexes.base.Index"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df.columns)"
]
},
{
"cell_type": "code",
"execution_count": 125,
"id": "7392fd77-964b-4c0f-97e2-51447d7a5fe8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts', 'Reputation']"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 126,
"id": "b8c0b85b-e8f9-4e96-8193-cbb68bcd32f0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4]"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "7d00b16e-53e1-4329-b45d-f636642804d3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"User ID int64\n",
"Username object\n",
"Age float64\n",
"Joined Date datetime64[ns]\n",
"Total Posts int64\n",
"Reputation int64\n",
"dtype: object"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 128,
"id": "cf15d198-7729-456c-86e5-1f9116b2f9d6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[1, 'bogdan_s', 18.0, Timestamp('2032-01-01 00:00:00'), 150, 500],\n",
" [2, 'jane_smith', 35.0, Timestamp('2032-02-15 00:00:00'), 230,\n",
" 720],\n",
" [3, 'alex123', 25.0, Timestamp('2032-04-25 00:00:00'), 80, 200],\n",
" [4, 'bob56', 38.0, Timestamp('2032-06-21 00:00:00'), 420, 940],\n",
" [5, 'mark_wilson', nan, Timestamp('2032-09-15 00:00:00'), 310,\n",
" 500]], dtype=object)"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 129,
"id": "f5481626-06fc-40be-9884-60481939ddfb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df.values)"
]
},
{
"cell_type": "code",
"execution_count": 130,
"id": "76e46bea-717c-40ca-9e96-81df57119e1e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'jane_smith'"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values[1, 1]"
]
},
{
"cell_type": "code",
"execution_count": 131,
"id": "3f4f11cf-3ced-4859-b2de-d963d6ac6ec3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Username | \n",
" Age | \n",
" Joined Date | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" bogdan_s | \n",
" 18.0 | \n",
" 2032-01-01 | \n",
" 150 | \n",
" 500 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" jane_smith | \n",
" 35.0 | \n",
" 2032-02-15 | \n",
" 230 | \n",
" 720 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" alex123 | \n",
" 25.0 | \n",
" 2032-04-25 | \n",
" 80 | \n",
" 200 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Username Age Joined Date Total Posts Reputation\n",
"0 1 bogdan_s 18.0 2032-01-01 150 500\n",
"1 2 jane_smith 35.0 2032-02-15 230 720\n",
"2 3 alex123 25.0 2032-04-25 80 200"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 132,
"id": "47c79af0-399a-4b6a-95c0-382a10cd0cd4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Username | \n",
" Age | \n",
" Joined Date | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 4 | \n",
" bob56 | \n",
" 38.0 | \n",
" 2032-06-21 | \n",
" 420 | \n",
" 940 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" mark_wilson | \n",
" NaN | \n",
" 2032-09-15 | \n",
" 310 | \n",
" 500 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Username Age Joined Date Total Posts Reputation\n",
"3 4 bob56 38.0 2032-06-21 420 940\n",
"4 5 mark_wilson NaN 2032-09-15 310 500"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(2)"
]
},
{
"cell_type": "code",
"execution_count": 133,
"id": "3e519f5f-ef45-40b6-9d58-6720aee5caac",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Age | \n",
" Joined Date | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5.00 | \n",
" 4.00 | \n",
" 5 | \n",
" 5.0 | \n",
" 5.00 | \n",
"
\n",
" \n",
" mean | \n",
" 3.00 | \n",
" 29.00 | \n",
" 2032-04-28 00:00:00 | \n",
" 238.0 | \n",
" 572.00 | \n",
"
\n",
" \n",
" min | \n",
" 1.00 | \n",
" 18.00 | \n",
" 2032-01-01 00:00:00 | \n",
" 80.0 | \n",
" 200.00 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.00 | \n",
" 23.25 | \n",
" 2032-02-15 00:00:00 | \n",
" 150.0 | \n",
" 500.00 | \n",
"
\n",
" \n",
" 50% | \n",
" 3.00 | \n",
" 30.00 | \n",
" 2032-04-25 00:00:00 | \n",
" 230.0 | \n",
" 500.00 | \n",
"
\n",
" \n",
" 75% | \n",
" 4.00 | \n",
" 35.75 | \n",
" 2032-06-21 00:00:00 | \n",
" 310.0 | \n",
" 720.00 | \n",
"
\n",
" \n",
" max | \n",
" 5.00 | \n",
" 38.00 | \n",
" 2032-09-15 00:00:00 | \n",
" 420.0 | \n",
" 940.00 | \n",
"
\n",
" \n",
" std | \n",
" 1.58 | \n",
" 9.20 | \n",
" NaN | \n",
" 133.3 | \n",
" 276.62 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Age Joined Date Total Posts Reputation\n",
"count 5.00 4.00 5 5.0 5.00\n",
"mean 3.00 29.00 2032-04-28 00:00:00 238.0 572.00\n",
"min 1.00 18.00 2032-01-01 00:00:00 80.0 200.00\n",
"25% 2.00 23.25 2032-02-15 00:00:00 150.0 500.00\n",
"50% 3.00 30.00 2032-04-25 00:00:00 230.0 500.00\n",
"75% 4.00 35.75 2032-06-21 00:00:00 310.0 720.00\n",
"max 5.00 38.00 2032-09-15 00:00:00 420.0 940.00\n",
"std 1.58 9.20 NaN 133.3 276.62"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe().round(2)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"id": "023b55d7-f83b-4f6a-9687-37aee4a577f6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Joined Date | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 18.0 | \n",
" 2032-01-01 | \n",
"
\n",
" \n",
" 1 | \n",
" 35.0 | \n",
" 2032-02-15 | \n",
"
\n",
" \n",
" 2 | \n",
" 25.0 | \n",
" 2032-04-25 | \n",
"
\n",
" \n",
" 3 | \n",
" 38.0 | \n",
" 2032-06-21 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" 2032-09-15 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Age Joined Date\n",
"0 18.0 2032-01-01\n",
"1 35.0 2032-02-15\n",
"2 25.0 2032-04-25\n",
"3 38.0 2032-06-21\n",
"4 NaN 2032-09-15"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select_dtypes(exclude=['object', 'int64'])"
]
},
{
"cell_type": "code",
"execution_count": 135,
"id": "210468be-60a9-410d-8c8f-c1573443d9f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Username'], dtype='object')"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select_dtypes(include='object').columns"
]
},
{
"cell_type": "code",
"execution_count": 136,
"id": "89b946d3-0a98-4b4a-9906-0e296cec1963",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 150 | \n",
" 500 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 230 | \n",
" 720 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 80 | \n",
" 200 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 420 | \n",
" 940 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 310 | \n",
" 500 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Total Posts Reputation\n",
"0 1 150 500\n",
"1 2 230 720\n",
"2 3 80 200\n",
"3 4 420 940\n",
"4 5 310 500"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.select_dtypes(include='int64')"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "fe65dd76-bd28-4f13-962c-43e6cc803ecf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" User ID | \n",
" Username | \n",
" Age | \n",
" Joined Date | \n",
" Total Posts | \n",
" Reputation | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" User ID Username Age Joined Date Total Posts Reputation\n",
"0 False False False False False False\n",
"1 False False False False False False\n",
"2 False False False False False False\n",
"3 False False False False False False\n",
"4 False False True False False False"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna()"
]
},
{
"cell_type": "code",
"execution_count": 142,
"id": "4c60d120-82ed-4aee-924f-f2e1d3df5e72",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"User ID 0\n",
"Username 0\n",
"Age 1\n",
"Joined Date 0\n",
"Total Posts 0\n",
"Reputation 0\n",
"dtype: int64"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "markdown",
"id": "abcdb997-4ac2-4353-affb-dfa1767784da",
"metadata": {},
"source": [
"## Series"
]
},
{
"cell_type": "code",
"execution_count": 146,
"id": "41eb8eeb-e0a1-4d12-9591-7dd7e6f5a737",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 bogdan_s\n",
"1 jane_smith\n",
"2 alex123\n",
"3 bob56\n",
"4 mark_wilson\n",
"Name: Username, dtype: object"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"username_series = df['Username']\n",
"username_series"
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "2a476131-dccd-4886-9d2d-342db8affe5f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(username_series)"
]
},
{
"cell_type": "code",
"execution_count": 148,
"id": "0f829957-8aca-4b62-a664-2a95330570ff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['bogdan_s', 'jane_smith', 'alex123', 'bob56', 'mark_wilson'],\n",
" dtype=object)"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"username_series.values"
]
},
{
"cell_type": "code",
"execution_count": 149,
"id": "40c36b4e-4c1c-4c66-b801-32b7d11e4429",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(username_series.values)"
]
},
{
"cell_type": "code",
"execution_count": 150,
"id": "e04c9d33-e362-45a6-acdd-c3139eb2e819",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=5, step=1)"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"username_series.index"
]
},
{
"cell_type": "code",
"execution_count": 155,
"id": "dcdf9cc3-602e-4187-aa7f-89572fe43b2e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Reputation\n",
"500 2\n",
"720 1\n",
"200 1\n",
"940 1\n",
"Name: count, dtype: int64"
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Reputation'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 156,
"id": "6dcfbaa9-ab64-4639-ba7d-4bf60cd2b9d5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Reputation\n",
"720 1\n",
"200 1\n",
"940 1\n",
"500 2\n",
"Name: count, dtype: int64"
]
},
"execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Reputation'].value_counts(ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": 157,
"id": "0f570a5e-7dce-4adc-8010-055e3f795e22",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([500, 720, 200, 940])"
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Reputation'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 159,
"id": "91b78beb-7901-4596-b146-2991750d8fe4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4 mark_wilson\n",
"1 jane_smith\n",
"0 bogdan_s\n",
"3 bob56\n",
"2 alex123\n",
"Name: Username, dtype: object"
]
},
"execution_count": 159,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Username'].sort_values(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"id": "4b8961bf-968e-4d0c-9513-8f07475ee7c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 alex123\n",
"3 bob56\n",
"0 bogdan_s\n",
"1 jane_smith\n",
"4 mark_wilson\n",
"Name: Username, dtype: object"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Username'].sort_values()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}