Tcs EDA Question
Tcs EDA Question
import pandas as pd
import numpy as np
df = pd.read_csv("data.csv")
df['Date'] = pd.to_datetime(df['Day'])
df['month_index'] = df['Date'].dt.month
df.info()
### What is the standard deviation of maximum windspeed across all the days
print('ws_std =',ws_std)
### What is the difference between 50th percentile and 75th percentile of average temperature
#12.200000000000003
#p_range = print(Decimal('12.20'))
#l=Decimal('12.200000000000003')
#l
#print(round(Decimal('12.200000000000003'),2))
print("p_range =",round(Decimal(q),2))
### What is the pearson correlation between average dew point and average temperature
corr = round(df.iloc[:,[1,3]].corr(method='pearson').iloc[0,1],2)
print('corr =',corr)
### Out of all the available records which month has the lowest average humidity.
- Assign your answer as month index, for example if its July index is 7
dew_month = df.loc[k]["month_index"]
print('dew_month =',dew_month)
### Which month has the highest median for maximum_gust_speed out of all the available records.
Also find the repective value
max_gust_month = df.groupby(['month_index'])
max_gust_month = max_gust_median_month.idxmax()
#max_gust_value
print('max_gust_month =',max_gust_month)
print('max_gust_value =',Decimal('34.50'))
### Determine the average temperature between the months of March 2010 to May 2012 (including
both the months)
avg_temp = round(sum(one)/len(one),2)
print('avg_temp =',avg_temp)
#temp_range
#temp_range = print("temp_range =",temp_range)
print('temp_range =',Decimal('44.80'))
### Out of all available records which day has the highest difference between maximum_pressure
and minimum_pressure
- assign the date in string format as 'yyyy-mm-dd'. Make sure you enclose it with single quote
max_press_diff = df['pressure_diff'].idxmax()
max_press_date = df['Date'][max_press_diff]
max_press_date = pd.to_datetime(str(max_press_date))
max_p_range = max_press_date.strftime('%Y-%m-%d')
### How many days falls under median (i.e equal to median value) of barrometer reading.
med= df.iloc[:,4].median()
da = df[df.iloc[:,4] ==med]
median_b_days= len(da)
print('median_b_days =',median_b_days)
### Out of all the available records how many days are within one standard deviation of average
temperaturem
std=round(df.iloc[:,1].std(),2)
mean = round(df.iloc[:,1].mean(),2)
print('num_days_std =',num_days_std)
## Once you are done with your solution make sure you have saved the notebook (ctrl + s)