{ "cells": [ { "cell_type": "code", "execution_count": 119, "id": "e3e56e2b-d47e-43a2-9eb5-513a6063c09d", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "c8d0e24a-0dbd-4fb7-820f-4e0cc964e341", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "source": [ "## Data Frames" ] }, { "cell_type": "code", "execution_count": 120, "id": "4a7f7a67-59dd-4008-8c48-6374c5decb3b", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDUsernameAgeJoined DateTotal PostsReputation
01bogdan_s18.02032-01-01150500
12jane_smith35.02032-02-15230720
23alex12325.02032-04-2580200
34bob5638.02032-06-21420940
45mark_wilsonNaN2032-09-15310500
\n", "
" ], "text/plain": [ " User ID Username Age Joined Date Total Posts Reputation\n", "0 1 bogdan_s 18.0 2032-01-01 150 500\n", "1 2 jane_smith 35.0 2032-02-15 230 720\n", "2 3 alex123 25.0 2032-04-25 80 200\n", "3 4 bob56 38.0 2032-06-21 420 940\n", "4 5 mark_wilson NaN 2032-09-15 310 500" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forum_users = {\n", " 'User ID': np.array([1, 2, 3, 4, 5]),\n", " 'Username': ['bogdan_s', 'jane_smith', 'alex123', 'bob56', 'mark_wilson'],\n", " 'Age': [18, 35, 25, 38, None],\n", " 'Joined Date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-25', '2032-06-21', '2032-09-15']),\n", " 'Total Posts': [150, 230, 80, 420, 310],\n", " 'Reputation': [500, 720, 200, 940, 500]\n", "}\n", "\n", "df = pd.DataFrame(forum_users)\n", "df" ] }, { "cell_type": "code", "execution_count": 121, "id": "433c340e-cf75-4c05-a8e7-e9e518bfc19e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5, 6)" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 122, "id": "b1841365-e8a1-4b5d-a07d-70250877dde0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.frame.DataFrame" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df)" ] }, { "cell_type": "code", "execution_count": 123, "id": "72f5fea6-2c02-457f-bcc2-ad97095d6958", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts',\n", " 'Reputation'],\n", " dtype='object')" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 124, "id": "6ead64b1-240e-4892-ad2c-5d057453a728", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.indexes.base.Index" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df.columns)" ] }, { "cell_type": "code", "execution_count": 125, "id": "7392fd77-964b-4c0f-97e2-51447d7a5fe8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['User ID', 'Username', 'Age', 'Joined Date', 'Total Posts', 'Reputation']" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns.tolist()" ] }, { "cell_type": "code", "execution_count": 126, "id": "b8c0b85b-e8f9-4e96-8193-cbb68bcd32f0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4]" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index.tolist()" ] }, { "cell_type": "code", "execution_count": 127, "id": "7d00b16e-53e1-4329-b45d-f636642804d3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "User ID int64\n", "Username object\n", "Age float64\n", "Joined Date datetime64[ns]\n", "Total Posts int64\n", "Reputation int64\n", "dtype: object" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 128, "id": "cf15d198-7729-456c-86e5-1f9116b2f9d6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 'bogdan_s', 18.0, Timestamp('2032-01-01 00:00:00'), 150, 500],\n", " [2, 'jane_smith', 35.0, Timestamp('2032-02-15 00:00:00'), 230,\n", " 720],\n", " [3, 'alex123', 25.0, Timestamp('2032-04-25 00:00:00'), 80, 200],\n", " [4, 'bob56', 38.0, Timestamp('2032-06-21 00:00:00'), 420, 940],\n", " [5, 'mark_wilson', nan, Timestamp('2032-09-15 00:00:00'), 310,\n", " 500]], dtype=object)" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.values" ] }, { "cell_type": "code", "execution_count": 129, "id": "f5481626-06fc-40be-9884-60481939ddfb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.ndarray" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df.values)" ] }, { "cell_type": "code", "execution_count": 130, "id": "76e46bea-717c-40ca-9e96-81df57119e1e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'jane_smith'" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.values[1, 1]" ] }, { "cell_type": "code", "execution_count": 131, "id": "3f4f11cf-3ced-4859-b2de-d963d6ac6ec3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDUsernameAgeJoined DateTotal PostsReputation
01bogdan_s18.02032-01-01150500
12jane_smith35.02032-02-15230720
23alex12325.02032-04-2580200
\n", "
" ], "text/plain": [ " User ID Username Age Joined Date Total Posts Reputation\n", "0 1 bogdan_s 18.0 2032-01-01 150 500\n", "1 2 jane_smith 35.0 2032-02-15 230 720\n", "2 3 alex123 25.0 2032-04-25 80 200" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(3)" ] }, { "cell_type": "code", "execution_count": 132, "id": "47c79af0-399a-4b6a-95c0-382a10cd0cd4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDUsernameAgeJoined DateTotal PostsReputation
34bob5638.02032-06-21420940
45mark_wilsonNaN2032-09-15310500
\n", "
" ], "text/plain": [ " User ID Username Age Joined Date Total Posts Reputation\n", "3 4 bob56 38.0 2032-06-21 420 940\n", "4 5 mark_wilson NaN 2032-09-15 310 500" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail(2)" ] }, { "cell_type": "code", "execution_count": 133, "id": "3e519f5f-ef45-40b6-9d58-6720aee5caac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDAgeJoined DateTotal PostsReputation
count5.004.0055.05.00
mean3.0029.002032-04-28 00:00:00238.0572.00
min1.0018.002032-01-01 00:00:0080.0200.00
25%2.0023.252032-02-15 00:00:00150.0500.00
50%3.0030.002032-04-25 00:00:00230.0500.00
75%4.0035.752032-06-21 00:00:00310.0720.00
max5.0038.002032-09-15 00:00:00420.0940.00
std1.589.20NaN133.3276.62
\n", "
" ], "text/plain": [ " User ID Age Joined Date Total Posts Reputation\n", "count 5.00 4.00 5 5.0 5.00\n", "mean 3.00 29.00 2032-04-28 00:00:00 238.0 572.00\n", "min 1.00 18.00 2032-01-01 00:00:00 80.0 200.00\n", "25% 2.00 23.25 2032-02-15 00:00:00 150.0 500.00\n", "50% 3.00 30.00 2032-04-25 00:00:00 230.0 500.00\n", "75% 4.00 35.75 2032-06-21 00:00:00 310.0 720.00\n", "max 5.00 38.00 2032-09-15 00:00:00 420.0 940.00\n", "std 1.58 9.20 NaN 133.3 276.62" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe().round(2)" ] }, { "cell_type": "code", "execution_count": 140, "id": "023b55d7-f83b-4f6a-9687-37aee4a577f6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeJoined Date
018.02032-01-01
135.02032-02-15
225.02032-04-25
338.02032-06-21
4NaN2032-09-15
\n", "
" ], "text/plain": [ " Age Joined Date\n", "0 18.0 2032-01-01\n", "1 35.0 2032-02-15\n", "2 25.0 2032-04-25\n", "3 38.0 2032-06-21\n", "4 NaN 2032-09-15" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select_dtypes(exclude=['object', 'int64'])" ] }, { "cell_type": "code", "execution_count": 135, "id": "210468be-60a9-410d-8c8f-c1573443d9f4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Username'], dtype='object')" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select_dtypes(include='object').columns" ] }, { "cell_type": "code", "execution_count": 136, "id": "89b946d3-0a98-4b4a-9906-0e296cec1963", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDTotal PostsReputation
01150500
12230720
2380200
34420940
45310500
\n", "
" ], "text/plain": [ " User ID Total Posts Reputation\n", "0 1 150 500\n", "1 2 230 720\n", "2 3 80 200\n", "3 4 420 940\n", "4 5 310 500" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select_dtypes(include='int64')" ] }, { "cell_type": "code", "execution_count": 137, "id": "fe65dd76-bd28-4f13-962c-43e6cc803ecf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDUsernameAgeJoined DateTotal PostsReputation
0FalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalse
4FalseFalseTrueFalseFalseFalse
\n", "
" ], "text/plain": [ " User ID Username Age Joined Date Total Posts Reputation\n", "0 False False False False False False\n", "1 False False False False False False\n", "2 False False False False False False\n", "3 False False False False False False\n", "4 False False True False False False" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isna()" ] }, { "cell_type": "code", "execution_count": 142, "id": "4c60d120-82ed-4aee-924f-f2e1d3df5e72", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "User ID 0\n", "Username 0\n", "Age 1\n", "Joined Date 0\n", "Total Posts 0\n", "Reputation 0\n", "dtype: int64" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isna().sum()" ] }, { "cell_type": "markdown", "id": "abcdb997-4ac2-4353-affb-dfa1767784da", "metadata": {}, "source": [ "## Series" ] }, { "cell_type": "code", "execution_count": 146, "id": "41eb8eeb-e0a1-4d12-9591-7dd7e6f5a737", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 bogdan_s\n", "1 jane_smith\n", "2 alex123\n", "3 bob56\n", "4 mark_wilson\n", "Name: Username, dtype: object" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "username_series = df['Username']\n", "username_series" ] }, { "cell_type": "code", "execution_count": 147, "id": "2a476131-dccd-4886-9d2d-342db8affe5f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(username_series)" ] }, { "cell_type": "code", "execution_count": 148, "id": "0f829957-8aca-4b62-a664-2a95330570ff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['bogdan_s', 'jane_smith', 'alex123', 'bob56', 'mark_wilson'],\n", " dtype=object)" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "username_series.values" ] }, { "cell_type": "code", "execution_count": 149, "id": "40c36b4e-4c1c-4c66-b801-32b7d11e4429", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.ndarray" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(username_series.values)" ] }, { "cell_type": "code", "execution_count": 150, "id": "e04c9d33-e362-45a6-acdd-c3139eb2e819", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=5, step=1)" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "username_series.index" ] }, { "cell_type": "code", "execution_count": 155, "id": "dcdf9cc3-602e-4187-aa7f-89572fe43b2e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Reputation\n", "500 2\n", "720 1\n", "200 1\n", "940 1\n", "Name: count, dtype: int64" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Reputation'].value_counts()" ] }, { "cell_type": "code", "execution_count": 156, "id": "6dcfbaa9-ab64-4639-ba7d-4bf60cd2b9d5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Reputation\n", "720 1\n", "200 1\n", "940 1\n", "500 2\n", "Name: count, dtype: int64" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Reputation'].value_counts(ascending=True)" ] }, { "cell_type": "code", "execution_count": 157, "id": "0f570a5e-7dce-4adc-8010-055e3f795e22", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([500, 720, 200, 940])" ] }, "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Reputation'].unique()" ] }, { "cell_type": "code", "execution_count": 159, "id": "91b78beb-7901-4596-b146-2991750d8fe4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4 mark_wilson\n", "1 jane_smith\n", "0 bogdan_s\n", "3 bob56\n", "2 alex123\n", "Name: Username, dtype: object" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Username'].sort_values(ascending=False)" ] }, { "cell_type": "code", "execution_count": 160, "id": "4b8961bf-968e-4d0c-9513-8f07475ee7c6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2 alex123\n", "3 bob56\n", "0 bogdan_s\n", "1 jane_smith\n", "4 mark_wilson\n", "Name: Username, dtype: object" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Username'].sort_values()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 5 }