Pandas PD: Import As
Pandas PD: Import As
[3]: df=pd.read_csv("titanic_dataset.csv")
[5]: df.head()
[6]: df.head(8)
1
7 8 0 3
[7]: df.tail()
[8]: df.tail(10)
2
887 888 1 1 Graham, Miss. Margaret Edith
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie"
889 890 1 1 Behr, Mr. Karl Howell
890 891 0 3 Dooley, Mr. Patrick
[9]: df.shape
[7]: df2=pd.read_csv("iris_dataset.csv")
df2
[7]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. … … … …
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
target
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. …
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
3
148 Iris-virginica
149 Iris-virginica
[12]: df2.head(10)
[12]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
5 5.4 3.9 1.7 0.4
6 4.6 3.4 1.4 0.3
7 5.0 3.4 1.5 0.2
8 4.4 2.9 1.4 0.2
9 4.9 3.1 1.5 0.1
target
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
5 Iris-setosa
6 Iris-setosa
7 Iris-setosa
8 Iris-setosa
9 Iris-setosa
[13]: df2.shape
[13]: (150, 5)
[14]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
4
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
[16]: df2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal length (cm) 150 non-null float64
1 sepal width (cm) 150 non-null float64
2 petal length (cm) 150 non-null float64
3 petal width (cm) 150 non-null float64
4 target 150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
[17]: df.describe()
Parch Fare
count 891.000000 891.000000
mean 0.381594 32.204208
std 0.806057 49.693429
min 0.000000 0.000000
25% 0.000000 7.910400
50% 0.000000 14.454200
75% 0.000000 31.000000
max 6.000000 512.329200
5
[18]: df2.describe()
[18]: sepal length (cm) sepal width (cm) petal length (cm) \
count 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667
std 0.828066 0.433594 1.764420
min 4.300000 2.000000 1.000000
25% 5.100000 2.800000 1.600000
50% 5.800000 3.000000 4.350000
75% 6.400000 3.300000 5.100000
max 7.900000 4.400000 6.900000
[19]: df.isnull()
[19]: PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket \
0 False False False False False False False False False
1 False False False False False False False False False
2 False False False False False False False False False
3 False False False False False False False False False
4 False False False False False False False False False
.. … … … … … … … … …
886 False False False False False False False False False
887 False False False False False False False False False
888 False False False False False True False False False
889 False False False False False False False False False
890 False False False False False False False False False
6
889 False False False
890 False True False
[20]: df.isnull().sum()
[20]: PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
[21]: df2.isnull().sum()
[22]: df.drop_duplicates()
7
1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1
2 Heikkinen, Miss. Laina female 26.0 0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
4 Allen, Mr. William Henry male 35.0 0
.. … … … …
886 Montvila, Rev. Juozas male 27.0 0
887 Graham, Miss. Margaret Edith female 19.0 0
888 Johnston, Miss. Catherine Helen "Carrie" female NaN 1
889 Behr, Mr. Karl Howell male 26.0 0
890 Dooley, Mr. Patrick male 32.0 0
[23]: df2.drop_duplicates()
[23]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. … … … …
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
target
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
8
.. …
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
[24]: df.value_counts('Sex')
[24]: Sex
male 577
female 314
dtype: int64
[26]: df2.value_counts('target')
[26]: target
Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
dtype: int64
9
7.2 False
5.9 False
4.4 False
6.8 False
4.7 False
6.6 False
7.4 False
7.6 False
7.3 False
4.3 False
7.1 False
7.0 False
5.3 False
4.5 False
7.9 False
dtype: bool
[28]: df.value_counts('Age')>18
[28]: Age
24.00 True
22.00 True
18.00 True
30.00 True
28.00 True
…
20.50 False
14.50 False
12.00 False
0.92 False
80.00 False
Length: 88, dtype: bool
[29]: df.sample()
[10]: df2.sample()
[10]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
80 5.5 2.4 3.8 1.1
target
10
80 Iris-versicolor
[30]: df.sample(5)
[31]: df.sample(axis=1)
[31]: Fare
0 7.2500
1 71.2833
2 7.9250
3 53.1000
4 8.0500
.. …
886 13.0000
887 30.0000
888 23.4500
889 30.0000
890 7.7500
[32]: df.nlargest(5,'Age')
11
96 male 71.0 0 0 PC 17754 34.6542 A5 C
493 male 71.0 0 0 PC 17609 49.5042 NaN C
116 male 70.5 0 0 370369 7.7500 NaN Q
[9]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
131 7.9 3.8 6.4 2.0
117 7.7 3.8 6.7 2.2
118 7.7 2.6 6.9 2.3
122 7.7 2.8 6.7 2.0
135 7.7 3.0 6.1 2.3
target
131 Iris-virginica
117 Iris-virginica
118 Iris-virginica
122 Iris-virginica
135 Iris-virginica
[33]: df.nsmallest(5,'Age')
[8]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
13 4.3 3.0 1.1 0.1
8 4.4 2.9 1.4 0.2
38 4.4 3.0 1.3 0.2
42 4.4 3.2 1.3 0.2
41 4.5 2.3 1.3 0.3
target
13 Iris-setosa
12
8 Iris-setosa
38 Iris-setosa
42 Iris-setosa
41 Iris-setosa
[34]: df.loc[[0,1,2]]
[35]: df.loc[[3,4,5],['PassengerId','Survived','Name']]
13
873 874 0 3
879 880 1 1
885 886 0 3
14
Name Sex Age SibSp \
1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
11 Bonnell, Miss. Elizabeth female 58.0 0
15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0
21 Beesley, Mr. Lawrence male 34.0 0
.. … … … …
857 Daly, Mr. Peter Denis male 51.0 0
862 Swift, Mrs. Frederick Joel (Margaret Welles Ba… female 48.0 0
865 Bystrom, Mrs. (Karolina) female 42.0 0
871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1
879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0
15
[5]: PassengerId Survived
0 1 0
2 3 1
[40]: df.dropna()
16
871 1 11751 52.5542 D35 S
872 0 695 5.0000 B51 B53 B55 S
879 1 11767 83.1583 C50 C
887 0 112053 30.0000 B42 S
889 0 111369 30.0000 C148 C
[41]: df.isna().any()
[13]: df2.isnull()
[13]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 False False False False
1 False False False False
2 False False False False
3 False False False False
4 False False False False
.. … … … …
145 False False False False
146 False False False False
147 False False False False
148 False False False False
149 False False False False
target
0 False
1 False
2 False
3 False
4 False
.. …
145 False
17
146 False
147 False
148 False
149 False
[14]: df2.isnull().sum()
[15]: df2.tail()
[15]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
target
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
[12]: df2.dropna()
[12]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
.. … … … …
145 6.7 3.0 5.2 2.3
146 6.3 2.5 5.0 1.9
147 6.5 3.0 5.2 2.0
148 6.2 3.4 5.4 2.3
149 5.9 3.0 5.1 1.8
18
target
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Iris-setosa
4 Iris-setosa
.. …
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 Iris-virginica
149 Iris-virginica
19