DATA ANALYSIS PROJECT – SQL
Retrieve all columns from the Sales_Analysis dataset in ascending order by sale_date.
SELECT TOP(5) * FROM Sales_Analysis ORDER BY sale_date ASC;
------------------------DATA CLEANING----------------------------
Modify the cogs column in the dataset to display two decimal places.
ALTER TABLE Sales_Analysis ALTER COLUMN cogs DECIMAL(10, 2);
Modify the price_per_unit column in the dataset to display two decimal places.
ALTER TABLE Sales_Analysis ALTER COLUMN price_per_unit DECIMAL(10, 2);
Modify the total_sale column in the dataset to display two decimal places.
ALTER TABLE Sales_Analysis ALTER COLUMN total_sale DECIMAL(10, 2);
Modify the sale_time column in the dataset to display hh:mm:ss format.
ALTER TABLE Sales_Analysis ALTER COLUMN sale_time TIME(0);
Check if there are any NULL values in the dataset.
SELECT * FROM Sales_Analysis
WHERE transactions_id IS NULL
OR sale_date IS NULL
OR sale_time IS NULL
OR customer_id IS NULL
OR gender IS NULL
OR age IS NULL
OR category IS NULL
OR quantity IS NULL
OR price_per_unit IS NULL
OR cogs IS NULL
OR total_sale IS NULL;
Delete rows with NULL values from the dataset.
DELETE FROM Sales_Analysis
WHERE transactions_id IS NULL
OR sale_date IS NULL
OR sale_time IS NULL
OR customer_id IS NULL
OR gender IS NULL
OR age IS NULL
OR category IS NULL
OR quantity IS NULL
OR price_per_unit IS NULL
OR cogs IS NULL
OR total_sale IS NULL;
Rename the column ‘quantiy’ to ‘quantity’.
EXEC sp_rename 'Sales_Analysis.quantiy', 'quantity', 'COLUMN';
--------------------------DATA ANALYSIS---------------------------
Display the total number of transactions.
SELECT COUNT(*) AS 'Total Transactions' FROM Sales_Analysis;
Retrieve unique values from the category column.
SELECT DISTINCT category AS 'Category' FROM Sales_Analysis;
Retrieve the ‘Total quantity’ and ‘Total sale’ from the dataset.
SELECT COUNT(quantity) AS 'Total Quantity', SUM(total_sale) AS 'Total Sale' FROM
Sales_Analysis;
Retrieve ‘Category’, ‘Gender’ and ‘the number of transactions’, grouped by ‘Category’
and ‘Gender’
SELECT category AS 'Category', gender AS 'Gender', COUNT(transactions_id) AS
'Transactions' FROM Sales_Analysis
GROUP BY category, gender
ORDER BY COUNT(transactions_id) DESC;
Retrieve ‘Category’, ‘Quantity’ and ‘the sum of total sales’, grouped by ‘Category’.
SELECT category AS 'Category' ,COUNT(quantity) AS 'Quantity', SUM(total_sale) AS
'Totale Sale' FROM Sales_Analysis
GROUP BY category
ORDER BY COUNT(quantity);
Retrieve the ‘Year’ and the count of ‘Transactions’ from the dataset.
SELECT YEAR(sale_date) AS 'Year', COUNT(transactions_id) AS 'Transactions' FROM
Sales_Analysis
GROUP BY YEAR(sale_date)
ORDER BY YEAR(sale_date);
Retrieve the total number of distinct customers.
SELECT COUNT(DISTINCT customer_id) AS 'Total Customers' FROM Sales_Analysis;
Retrieve ‘Year’ and the sum of ‘Total sales’, grouped by ‘Year’.
SELECT YEAR(sale_date) AS 'Year', SUM(total_sale) AS 'Total Sales' FROM
Sales_Analysis
GROUP BY YEAR(sale_date)
ORDER BY YEAR(sale_date) ASC;
Compare the total sales between 2022 and 2023 for each month.
SELECT MONTH(sale_date) AS 'Month',
COALESCE(SUM(CASE WHEN YEAR(sale_date) = 2022 THEN total_sale END), 0) AS 'Total
Sales 2022',
COALESCE(SUM(CASE WHEN YEAR(sale_date) = 2023 THEN total_sale END), 0) AS 'Total
Sales 2023'
FROM Sales_Analysis
WHERE YEAR(sale_date) IN (2022, 2023)
GROUP BY MONTH(sale_date)
ORDER BY MONTH(sale_date);
Compare the average sales between 2022 and 2023 for each month.
SELECT
MONTH(sale_date) AS 'Month',
COALESCE(CAST(AVG(CASE WHEN YEAR(sale_date) = 2022 THEN total_sale END) AS
DECIMAL(10, 2)), 0.00) AS 'Avg Sales 2022',
COALESCE(CAST(AVG(CASE WHEN YEAR(sale_date) = 2023 THEN total_sale END) AS
DECIMAL(10, 2)), 0.00) AS 'Avg Sales 2023'
FROM Sales_Analysis
WHERE YEAR(sale_date) IN (2022, 2023)
GROUP BY MONTH(sale_date)
ORDER BY MONTH(sale_date);
Retrieve ‘Age group’, ‘Quantity’ and ‘Total sales’, grouped by ‘Age group’.
SELECT
CASE
WHEN Age BETWEEN 18 AND 23 THEN '18-23'
WHEN Age BETWEEN 24 AND 29 THEN '24-29'
WHEN Age BETWEEN 30 AND 35 THEN '30-35'
WHEN Age BETWEEN 36 AND 41 THEN '36-41'
WHEN Age BETWEEN 42 AND 47 THEN '42-47'
WHEN Age BETWEEN 48 AND 60 THEN '48-60'
ELSE '60+'
END AS 'Age group', SUM(quantity) AS 'Quantity', SUM(total_sale) AS 'Total Sales'
FROM Sales_Analysis
GROUP BY
CASE
WHEN Age BETWEEN 18 AND 23 THEN '18-23'
WHEN Age BETWEEN 24 AND 29 THEN '24-29'
WHEN Age BETWEEN 30 AND 35 THEN '30-35'
WHEN Age BETWEEN 36 AND 41 THEN '36-41'
WHEN Age BETWEEN 42 AND 47 THEN '42-47'
WHEN Age BETWEEN 48 AND 60 THEN '48-60'
ELSE '60+'
END
ORDER BY 'Age group';
Retrieve the ‘Time period’ and the number of ‘Transactions’, grouped by ‘Time period’.
SELECT
CASE
WHEN DATEPART(HOUR, sale_time) < 12 THEN 'Morning'
WHEN DATEPART(HOUR, sale_time) BETWEEN 12 AND 17 THEN 'Afternoon'
WHEN DATEPART(HOUR, sale_time) BETWEEN 18 AND 21 THEN 'Evening'
ELSE 'Night'
END AS 'Time Period', COUNT(transactions_id) AS 'Transactions'
FROM Sales_Analysis
GROUP BY
CASE
WHEN DATEPART(HOUR, sale_time) < 12 THEN 'Morning'
WHEN DATEPART(HOUR, sale_time) BETWEEN 12 AND 17 THEN 'Afternoon'
WHEN DATEPART(HOUR, sale_time) BETWEEN 18 AND 21 THEN 'Evening'
ELSE 'Night'
END
ORDER BY 'Transactions' DESC;