Uber - Analysis - Jupyter - Notebook
Uber - Analysis - Jupyter - Notebook
In [3]: data.head()
Out[3]:
Date/Time Lat Lon Base
data['weekday']=data['Date/Time'].map(get_weekday)
data['hour']=data['Date/Time'].map(get_hour)
localhost:8888/notebooks/Uber_Analysis.ipynb 1/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [8]: data.tail()
Out[8]:
Date/Time Lat Lon Base dom weekday hour
#by default histogram has 10 bins; but because we have 30 days in a month,
#so we have to take bins=30
#rwidth is to provide width between each bar
#range is to take data on xlabel from 0.5 to 30.5
xlabel('Day Of Month')
ylabel('Frequency')
title('Uber Frequency details for Apl 14')
localhost:8888/notebooks/Uber_Analysis.ipynb 2/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [14]: for k,rows in data.groupby('dom'):
print((k,len(rows)))
#This returns the frequency in numbers for each day of the month
(1, 14546)
(2, 17474)
(3, 20701)
(4, 26714)
(5, 19521)
(6, 13445)
(7, 19550)
(8, 16188)
(9, 16843)
(10, 20041)
(11, 20420)
(12, 18170)
(13, 12112)
(14, 12674)
(15, 20641)
(16, 17717)
(17, 20973)
(18, 18074)
(19, 14602)
(20, 11017)
(21, 13162)
(22, 16975)
(23, 20346)
(24, 23352)
(25, 25095)
(26, 24925)
(27, 14677)
(28, 15475)
(29, 22835)
(30, 36251)
by_date=data.groupby('dom').apply(count_rows)
#works the same as above function but is more according to pandas convention
localhost:8888/notebooks/Uber_Analysis.ipynb 3/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [17]: plot(by_date)
#plotting above Data on a graph
In [19]: bar(range(1,31),by_date)
#Bar Graph- same as the one plotted earlier
localhost:8888/notebooks/Uber_Analysis.ipynb 4/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [20]: by_date_sorted=by_date.sort_values()
by_date_sorted
#sorts the data as per the values
Out[20]: dom
20 11017
13 12112
14 12674
21 13162
6 13445
1 14546
19 14602
27 14677
28 15475
8 16188
9 16843
22 16975
2 17474
16 17717
18 18074
12 18170
5 19521
7 19550
10 20041
23 20346
11 20420
15 20641
3 20701
17 20973
29 22835
24 23352
26 24925
25 25095
4 26714
30 36251
dtype: int64
localhost:8888/notebooks/Uber_Analysis.ipynb 5/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [76]: plt.figure(figsize=(12,8))
bar(range(1,31),by_date_sorted)
#This plots a bar graph but x axis is still sorted as 1,2,3...
xticks(range(1,31),by_date_sorted.index)
#This sorts the x-axis according to the values in ascending order
#but it generates some random string
#To prevent that, append a ';' in the end
xlabel('Day Of Month')
ylabel('Frequency')
title('Uber Frequency details for Apl 14')
#applying x-axis,y-axis and labels
localhost:8888/notebooks/Uber_Analysis.ipynb 6/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [83]: hist(data.weekday,bins=7,range=(-0.5,6.5),rwidth=0.8,color='#AA6666',alpha=0.4)
xticks(range(7),'Mon Tue Wed Thu Fri Sat Sun'.split())
;
Out[83]: ''
Cross Analysis
localhost:8888/notebooks/Uber_Analysis.ipynb 7/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [98]: plt.figure(figsize=(12,8))
seaborn.heatmap(cross)
yticks(range(7),'Mon Tue Wed Thu Fri Sat Sun'.split())
plt.xlabel('Hour',fontsize=20)
plt.ylabel('Weekday',fontsize=20)
localhost:8888/notebooks/Uber_Analysis.ipynb 8/10
9/14/2019 Uber_Analysis - Jupyter Notebook
Analysis by Latitude/Longitude
In [119]: hist(data['Lon'],bins=100,range=(-74.2,-73.7),color='y',alpha=0.5,label='Longitude')
legend(loc='upper left')
twiny()
hist(data['Lat'],bins=100,range=(40.50,41),color='b',alpha=0.5,label='Latitude')
grid()
legend()
;
Out[119]: ''
localhost:8888/notebooks/Uber_Analysis.ipynb 9/10
9/14/2019 Uber_Analysis - Jupyter Notebook
In [127]: plt.figure(figsize=(12,8))
plot(data['Lon'],data['Lat'],'.',ms=1,alpha=0.5)
grid()
xlim(-74.2,-73.7)
ylim(41,40.50)
In [ ]:
localhost:8888/notebooks/Uber_Analysis.ipynb 10/10