f10
f10
# Length of address
df_features['Address_Length'] = df_features['Address'].apply(lambda x: len(str(x))
if pd.notna(x) else 0)
# Number of cuisines
df_features['Cuisine_Count'] = df_features['Cuisines'].apply(
lambda x: len(str(x).split(',')) if pd.notna(x) else 0
)
# Rating Category
def categorize_rating(rating):
if rating >= 4.5:
return 'Excellent'
elif rating >= 4.0:
return 'Very Good'
elif rating >= 3.5:
return 'Good'
elif rating >= 3.0:
return 'Average'
elif rating >= 2.0:
return 'Poor'
else:
return 'Very Poor'
df_features['Rating_Category'] = df_features['Aggregate
rating'].apply(categorize_rating)
# Cost Category
def categorize_cost(cost, currency):
if pd.isna(cost) or pd.isna(currency):
return 'Unknown'
df_features['Cost_Category'] = df_features.apply(
lambda row: categorize_cost(row['Average Cost for two'], row['Currency']),
axis=1
)
plt.figure(figsize=(14, 6))
sns.boxplot(x='Cost_Category', y='Aggregate rating', data=df_features,
palette='viridis')
plt.title('Rating Distribution by Cost Category', fontsize=16)
plt.xlabel('Cost Category', fontsize=14)
plt.ylabel('Aggregate Rating', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.show()