Amazon Sales Analysis Project - Jupyter Notebook
Amazon Sales Analysis Project - Jupyter Notebook
#import libraries
In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import Dataset
In [5]:
df = pd.read_csv(r'C:\Users\SRINIVAS\Downloads\amazon_sales.csv - Sheet1.csv')
df
Out[5]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-cit
Channel Status
level
405-
On the
0 0 8078784- 4/30/2022 Cancelled Merchant Amazon.in Standard T-shirt S 0 INR 647.62 MUMBA
Way
5731545
171- Shipped -
1 1 9198151- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt 3XL Shipped 1 INR 406.00 BENGALUR
1101146 to Buyer
404-
2 2 0687676- 4/30/2022 Shipped Amazon Amazon.in Expedited Shirt XL Shipped 1 INR 329.00 NAVI MUMBA
7273146
403-
On the
3 3 9615377- 4/30/2022 Cancelled Merchant Amazon.in Standard Blazzer L 0 INR 753.33 PUDUCHERR
Way
8133951
407-
4 4 1069790- 4/30/2022 Shipped Amazon Amazon.in Expedited Trousers 3XL Shipped 1 INR 574.00 CHENNA
7240320
... ... ... ... ... ... ... ... ... ... ... ... ... ... .
406-
128971 128970 6001380- 5/31/2022 Shipped Amazon Amazon.in Expedited Shirt XL Shipped 1 INR 517.00 HYDERABA
7673107
402-
128972 128971 9551604- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt M Shipped 1 INR 999.00 GURUGRAM
7544318
407-
128973 128972 9547469- 5/31/2022 Shipped Amazon Amazon.in Expedited Blazzer XXL Shipped 1 INR 690.00 HYDERABA
3152358
402-
128974 128973 6184140- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt XS Shipped 1 INR 1199.00 Halo
0545956
408-
128975 128974 7436540- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt S Shipped 1 INR 696.00 Raipu
8728312
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128976 entries, 0 to 128975
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 index 128976 non-null int64
1 Order ID 128976 non-null object
2 Date 128976 non-null object
3 Status 128976 non-null object
4 Fulfilment 128976 non-null object
5 Sales Channel 128976 non-null object
6 ship-service-level 128976 non-null object
7 Category 128976 non-null object
8 Size 128976 non-null object
9 Courier Status 128976 non-null object
10 Qty 128976 non-null int64
11 currency 121176 non-null object
12 Amount 121176 non-null float64
13 ship-city 128941 non-null object
14 ship-state 128941 non-null object
15 ship-postal-code 128941 non-null float64
16 ship-country 128941 non-null object
17 B2B 128976 non-null bool
18 fulfilled-by 39263 non-null object
dtypes: bool(1), float64(2), int64(2), object(14)
memory usage: 17.8+ MB
In [8]:
df.head()
Out[8]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-city
Channel Status
level
405-
On the
0 0 8078784- 4/30/2022 Cancelled Merchant Amazon.in Standard T-shirt S 0 INR 647.62 MUMBAI MAH
Way
5731545
171- Shipped -
1 1 9198151- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt 3XL Shipped 1 INR 406.00 BENGALURU
1101146 to Buyer
404-
2 2 0687676- 4/30/2022 Shipped Amazon Amazon.in Expedited Shirt XL Shipped 1 INR 329.00 NAVI MUMBAI MAH
7273146
403-
On the
3 3 9615377- 4/30/2022 Cancelled Merchant Amazon.in Standard Blazzer L 0 INR 753.33 PUDUCHERRY PU
Way
8133951
407-
4 4 1069790- 4/30/2022 Shipped Amazon Amazon.in Expedited Trousers 3XL Shipped 1 INR 574.00 CHENNAI T
7240320
In [9]:
df.tail()
Out[9]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-city
Channel Status
level
406-
128971 128970 6001380- 5/31/2022 Shipped Amazon Amazon.in Expedited Shirt XL Shipped 1 INR 517.0 HYDERABAD
7673107
402-
128972 128971 9551604- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt M Shipped 1 INR 999.0 GURUGRAM
7544318
407-
128973 128972 9547469- 5/31/2022 Shipped Amazon Amazon.in Expedited Blazzer XXL Shipped 1 INR 690.0 HYDERABAD
3152358
402-
128974 128973 6184140- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt XS Shipped 1 INR 1199.0 Halol
0545956
408-
128975 128974 7436540- 5/31/2022 Shipped Amazon Amazon.in Expedited T-shirt S Shipped 1 INR 696.0 Raipur
8728312
In [10]:
In [11]:
pd.isnull(df)
Out[11]:
ship- ship-
Order Sales Courier ship- ship-
index Date Status Fulfilment service- Category Size Qty currency Amount postal-
ID Channel Status city state co
level code
0 False False False False False False False False False False False False False False False False
1 False False False False False False False False False False False False False False False False
2 False False False False False False False False False False False False False False False False
3 False False False False False False False False False False False False False False False False
4 False False False False False False False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
128971 False False False False False False False False False False False False False False False False
128972 False False False False False False False False False False False False False False False False
128973 False False False False False False False False False False False False False False False False
128974 False False False False False False False False False False False False False False False False
128975 False False False False False False False False False False False False False False False False
In [12]:
Out[12]:
index 0
Order ID 0
Date 0
Status 0
Fulfilment 0
Sales Channel 0
ship-service-level 0
Category 0
Size 0
Courier Status 0
Qty 0
currency 7800
Amount 7800
ship-city 35
ship-state 35
ship-postal-code 35
ship-country 35
B2B 0
fulfilled-by 89713
dtype: int64
In [13]:
df.shape
Out[13]:
(128976, 19)
In [15]:
In [16]:
df.shape
Out[16]:
(37514, 19)
In [17]:
pd.isnull(df).sum()
Out[17]:
index 0
Order ID 0
Date 0
Status 0
Fulfilment 0
Sales Channel 0
ship-service-level 0
Category 0
Size 0
Courier Status 0
Qty 0
currency 0
Amount 0
ship-city 0
ship-state 0
ship-postal-code 0
ship-country 0
B2B 0
fulfilled-by 0
dtype: int64
In [19]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 37514 entries, 0 to 128892
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 index 37514 non-null int64
1 Order ID 37514 non-null object
2 Date 37514 non-null object
3 Status 37514 non-null object
4 Fulfilment 37514 non-null object
5 Sales Channel 37514 non-null object
6 ship-service-level 37514 non-null object
7 Category 37514 non-null object
8 Size 37514 non-null object
9 Courier Status 37514 non-null object
10 Qty 37514 non-null int64
11 currency 37514 non-null object
12 Amount 37514 non-null float64
13 ship-city 37514 non-null object
14 ship-state 37514 non-null object
15 ship-postal-code 37514 non-null float64
16 ship-country 37514 non-null object
17 B2B 37514 non-null bool
18 fulfilled-by 37514 non-null object
dtypes: bool(1), float64(2), int64(2), object(14)
memory usage: 5.5+ MB
In [20]:
df.head()
Out[20]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-city
Channel Status
level
405-
On the
0 0 8078784- 4/30/2022 Cancelled Merchant Amazon.in Standard T-shirt S 0 INR 647.62 MUMBAI MAH
Way
5731545
171- Shipped -
1 1 9198151- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt 3XL Shipped 1 INR 406.00 BENGALURU
1101146 to Buyer
403-
On the
3 3 9615377- 4/30/2022 Cancelled Merchant Amazon.in Standard Blazzer L 0 INR 753.33 PUDUCHERRY PU
Way
8133951
406- Shipped -
7 7 7807733- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt S Shipped 1 INR 399.00 HYDERABAD T
3785945 to Buyer
405- Shipped -
12 12 5513694- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt XS Shipped 1 INR 399.00 Amravati. MAH
8146768 to Buyer
In [21]:
In [31]:
Out[31]:
dtype('int32')
In [32]:
df.describe()
Out[32]:
In [33]:
Out[33]:
ship-
Sales Courier sh
Order ID Date Status Fulfilment service- Category Size currency ship-city ship-state
Channel Status coun
level
count 37514 37514 37514 37514 37514 37514 37514 37514 37514 37514 37514 37514 375
Shipped
171-
-
top 5057375- 4/25/2022 Merchant Amazon.in Standard T-shirt M Shipped INR BENGALURU MAHARASHTRA
Delivered
2831560
to Buyer
freq 12 697 28741 37514 37514 37514 14062 6806 31859 37514 2839 6236 375
In [34]:
Out[34]:
Qty Amount
df.head()
Out[35]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-city
Channel Status
level
405-
On the
0 0 8078784- 4/30/2022 Cancelled Merchant Amazon.in Standard T-shirt S 0 INR 647.62 MUMBAI MAH
Way
5731545
171- Shipped -
1 1 9198151- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt 3XL Shipped 1 INR 406.00 BENGALURU
1101146 to Buyer
403-
On the
3 3 9615377- 4/30/2022 Cancelled Merchant Amazon.in Standard Blazzer L 0 INR 753.33 PUDUCHERRY PU
Way
8133951
406- Shipped -
7 7 7807733- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt S Shipped 1 INR 399.00 HYDERABAD T
3785945 to Buyer
405- Shipped -
12 12 5513694- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt XS Shipped 1 INR 399.00 Amravati. MAH
8146768 to Buyer
In [40]:
ax = sns.countplot(x='Size', data=df)
#check for data labels
for bars in ax.containers:
ax.bar_label(bars)
#courier status
plt.figure(figsize=(10,6))
sns.countplot(data=df, x='Courier Status', hue='Status')
plt.show()
df.head()
Out[45]:
ship-
Sales Courier
index Order ID Date Status Fulfilment service- Category Size Qty currency Amount ship-city
Channel Status
level
405-
On the
0 0 8078784- 4/30/2022 Cancelled Merchant Amazon.in Standard T-shirt S 0 INR 647.62 MUMBAI MAH
Way
5731545
171- Shipped -
1 1 9198151- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt 3XL Shipped 1 INR 406.00 BENGALURU
1101146 to Buyer
403-
On the
3 3 9615377- 4/30/2022 Cancelled Merchant Amazon.in Standard Blazzer L 0 INR 753.33 PUDUCHERRY PU
Way
8133951
406- Shipped -
7 7 7807733- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt S Shipped 1 INR 399.00 HYDERABAD T
3785945 to Buyer
405- Shipped -
12 12 5513694- 4/30/2022 Delivered Merchant Amazon.in Standard Shirt XS Shipped 1 INR 399.00 Amravati. MAH
8146768 to Buyer
In [46]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 37514 entries, 0 to 128892
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 index 37514 non-null int64
1 Order ID 37514 non-null object
2 Date 37514 non-null object
3 Status 37514 non-null object
4 Fulfilment 37514 non-null object
5 Sales Channel 37514 non-null object
6 ship-service-level 37514 non-null object
7 Category 37514 non-null object
8 Size 37514 non-null object
9 Courier Status 37514 non-null object
10 Qty 37514 non-null int64
11 currency 37514 non-null object
12 Amount 37514 non-null float64
13 ship-city 37514 non-null object
14 ship-state 37514 non-null object
15 ship-postal-code 37514 non-null int32
16 ship-country 37514 non-null object
17 B2B 37514 non-null bool
18 fulfilled-by 37514 non-null object
dtypes: bool(1), float64(1), int32(1), int64(2), object(14)
memory usage: 5.3+ MB
In [51]:
c_d = df['Category']
plt.figure(figsize=(10, 6))
plt.hist(c_d, bins=30, edgecolor="red",color="yellow")
plt.xticks(rotation=90)
plt.show()
#scatter plot
x_data = df['Category']
y_data = df['Size']
plt.scatter(x_data,y_data)
plt.xlabel('Category')
plt.ylabel('Size')
plt.title('Avalilable Size')
plt.show()
In [69]: