0% found this document useful (0 votes)
4 views3 pages

f12

The document outlines a customer preference analysis focusing on the relationship between cuisine types and restaurant ratings. It includes steps to analyze average ratings, popularity based on votes, and the percentage of high-rated restaurants for various cuisines, along with visualizations for better understanding. The analysis aims to identify top cuisines by rating, votes, and high rating percentages while exploring the relationship between popularity and ratings.

Uploaded by

sambhaviasingh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views3 pages

f12

The document outlines a customer preference analysis focusing on the relationship between cuisine types and restaurant ratings. It includes steps to analyze average ratings, popularity based on votes, and the percentage of high-rated restaurants for various cuisines, along with visualizations for better understanding. The analysis aims to identify top cuisines by rating, votes, and high rating percentages while exploring the relationship between popularity and ratings.

Uploaded by

sambhaviasingh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

# LEVEL 3 - TASK 2: CUSTOMER PREFERENCE ANALYSIS

print("LEVEL 3 - TASK 2: CUSTOMER PREFERENCE ANALYSIS")


print("============================================")

# 1. Analyze the relationship between the type of cuisine and the restaurant's
rating
# First, we need to create a dataframe with cuisine-level data
cuisine_data = []

for _, row in df_processed.iterrows():


if pd.isna(row['Cuisines']):
continue

cuisines = [cuisine.strip() for cuisine in row['Cuisines'].split(',')]


rating = row['Aggregate rating']
votes = row['Votes']

for cuisine in cuisines:


cuisine_data.append({
'Cuisine': cuisine,
'Rating': rating,
'Votes': votes
})

cuisine_df = pd.DataFrame(cuisine_data)

# Calculate average rating for each cuisine


cuisine_ratings = cuisine_df.groupby('Cuisine').agg({
'Rating': 'mean',
'Votes': 'sum',
'Cuisine': 'count'
}).rename(columns={'Cuisine': 'Count'}).reset_index()

# Filter cuisines with at least 10 restaurants


cuisine_ratings_filtered = cuisine_ratings[cuisine_ratings['Count'] >=
10].sort_values('Rating', ascending=False)

print("\nTop 20 Cuisines by Average Rating (min 10 restaurants):")


print(cuisine_ratings_filtered.head(20))

# Visualize top cuisines by rating


plt.figure(figsize=(14, 10))
sns.barplot(x='Rating', y='Cuisine', data=cuisine_ratings_filtered.head(20),
palette='viridis')
plt.title('Top 20 Cuisines by Average Rating', fontsize=16)
plt.xlabel('Average Rating', fontsize=14)
plt.ylabel('Cuisine', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

# 2. Identify the most popular cuisines among customers based on the number of
votes
cuisine_popularity = cuisine_ratings.sort_values('Votes', ascending=False)

print("\nTop 20 Most Popular Cuisines by Total Votes:")


print(cuisine_popularity.head(20))
# Visualize top cuisines by votes
plt.figure(figsize=(14, 10))
sns.barplot(x='Votes', y='Cuisine', data=cuisine_popularity.head(20),
palette='viridis')
plt.title('Top 20 Most Popular Cuisines by Total Votes', fontsize=16)
plt.xlabel('Total Votes', fontsize=14)
plt.ylabel('Cuisine', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

# Calculate average votes per restaurant for each cuisine


cuisine_ratings['Avg_Votes_Per_Restaurant'] = cuisine_ratings['Votes'] /
cuisine_ratings['Count']
cuisine_avg_votes = cuisine_ratings[cuisine_ratings['Count'] >=
10].sort_values('Avg_Votes_Per_Restaurant', ascending=False)

print("\nTop 20 Cuisines by Average Votes per Restaurant (min 10 restaurants):")


print(cuisine_avg_votes.head(20))

# Visualize top cuisines by average votes per restaurant


plt.figure(figsize=(14, 10))
sns.barplot(x='Avg_Votes_Per_Restaurant', y='Cuisine',
data=cuisine_avg_votes.head(20), palette='viridis')
plt.title('Top 20 Cuisines by Average Votes per Restaurant', fontsize=16)
plt.xlabel('Average Votes per Restaurant', fontsize=14)
plt.ylabel('Cuisine', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

# 3. Determine if there are any specific cuisines that tend to receive higher
ratings
# Calculate the percentage of high-rated restaurants (4.0+ stars) for each cuisine
high_rating_threshold = 4.0

cuisine_df['Is_High_Rating'] = cuisine_df['Rating'] >= high_rating_threshold


cuisine_high_ratings = cuisine_df.groupby('Cuisine').agg({
'Is_High_Rating': ['sum', 'count'],
'Rating': 'mean'
})

cuisine_high_ratings.columns = ['High_Rating_Count', 'Total_Count', 'Avg_Rating']


cuisine_high_ratings['High_Rating_Percentage'] =
(cuisine_high_ratings['High_Rating_Count'] / cuisine_high_ratings['Total_Count']) *
100
cuisine_high_ratings = cuisine_high_ratings.reset_index()

# Filter cuisines with at least 10 restaurants


cuisine_high_ratings_filtered =
cuisine_high_ratings[cuisine_high_ratings['Total_Count'] >=
10].sort_values('High_Rating_Percentage', ascending=False)

print(f"\nTop 20 Cuisines by Percentage of High Ratings ({high_rating_threshold}+


stars, min 10 restaurants):")
print(cuisine_high_ratings_filtered.head(20))

# Visualize top cuisines by percentage of high ratings


plt.figure(figsize=(14, 10))
sns.barplot(x='High_Rating_Percentage', y='Cuisine',
data=cuisine_high_ratings_filtered.head(20), palette='viridis')
plt.title(f'Top 20 Cuisines by Percentage of High Ratings ({high_rating_threshold}+
stars)', fontsize=16)
plt.xlabel('Percentage of High-Rated Restaurants', fontsize=14)
plt.ylabel('Cuisine', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

# Create a scatter plot to visualize the relationship between popularity and rating
plt.figure(figsize=(14, 10))
sns.scatterplot(
x='Votes',
y='Rating',
size='Count',
sizes=(50, 500),
alpha=0.7,
data=cuisine_ratings[cuisine_ratings['Count'] >= 10]
)

# Annotate some interesting points


for _, row in cuisine_ratings[cuisine_ratings['Count'] >= 50].sort_values('Rating',
ascending=False).head(10).iterrows():
plt.annotate(
row['Cuisine'],
xy=(row['Votes'], row['Rating']),
xytext=(5, 0),
textcoords='offset points',
fontsize=10
)

plt.title('Relationship Between Cuisine Popularity and Rating', fontsize=16)


plt.xlabel('Total Votes (Popularity)', fontsize=14)
plt.ylabel('Average Rating', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

You might also like