WITH member_sales_cte AS SELECT s.customer_id, m.join_date, s.order_date, s.product_id,DENSE_RANK OVERPARTITION BY s.customer_id ORDER BY s.order_date AS rankFROM sales AS s JOIN members
Trang 1ADVANCED SQLCASE STUDIES
Trang 2SQL CASE STUDY 1
Consider the following schema for this SQL case study
The sales table captures all customer_id level purchaseswith and corresponding order_date and product_idinformation for when and what menu items wereordered
Trang 3The menu table maps the product_id to the actualproduct_name and price of each menu item.
The final members table captures the join_date when acustomer_id joined the beta version of the Danny’s Dinerloyalty program
Trang 4What is the total amount each customer spent at therestaurant?
How many days has each customer visited the restaurant?What was the first item from the menu purchased by eachcustomer?
What is the most purchased item on the menu and howmany times was it purchased by all customers?
Which item was the most popular for each customer?Which item was purchased first by the customer after theybecame a member?
Which item was purchased just before the customer becamea member?
What is the total items and amount spent for each memberbefore they became a member?
If each $1 spent equates to 10 points and sushi has a 2xpoints multiplier - how many points would each customerhave?
In the first week after a customer joins the program(including their join date) they earn 2x points on all items, notjust sushi - how many points do customer A and B have atthe end of January?
Join All the Table ? Rank All the Table?
TRY FOLLOWING QUESTIONS
1.2.3.4.5.6.7.8.9
10
11.12
Trang 51 What is the total amount each customer spent atthe restaurant?
SELECT s.customer_id, SUM(price) AS total_salesFROM dbo.sales AS s
JOIN dbo.menu AS mON s.product_id = m.product_idGROUP BY customer_id;
2 How many days has each customer visited therestaurant?
SELECT customer_id, COUNT(DISTINCT(order_date)) ASvisit_count
FROM dbo.salesGROUP BY customer_id;
3 What was the first item from the menupurchased by each customer?
WITH ordered_sales_cte AS(
SELECT customer_id, order_date, product_name,DENSE_RANK() OVER(PARTITION BY s.customer_idORDER BY s.order_date) AS rank
FROM dbo.sales AS sJOIN dbo.menu AS m
Trang 6SELECT customer_id, product_nameFROM ordered_sales_cte
WHERE rank = 1GROUP BY customer_id, product_name;
4 What is the most purchased item on the menu andhow many times was it purchased by all customers?
SELECT TOP 1 (COUNT(s.product_id)) AS most_purchased,product_name
FROM dbo.sales AS sJOIN dbo.menu AS mON s.product_id = m.product_idGROUP BY s.product_id, product_nameORDER BY most_purchased DESC;SC;
5 Which item was the most popular for each customer?
WITH fav_item_cte AS(
SELECT s.customer_id, m.product_name,COUNT(m.product_id) AS order_count,DENSE_RANK() OVER(PARTITION BY s.customer_idORDER BY COUNT(s.customer_id) DESC) AS rankFROM dbo.menu AS m
JOIN dbo.sales AS sON m.product_id = s.product_idGROUP BY s.customer_id, m.product_name)
Trang 7SELECT customer_id, product_name, order_countFROM fav_item_cte
WHERE rank = 1;
6 Which item was purchased first by the customer afterthey became a member?
WITH member_sales_cte AS(
SELECT s.customer_id, m.join_date, s.order_date, s.product_id,DENSE_RANK() OVER(PARTITION BY s.customer_id
ORDER BY s.order_date) AS rankFROM sales AS s
JOIN members AS mON s.customer_id = m.customer_idWHERE s.order_date >= m.join_date)
SELECT s.customer_id, s.order_date, m2.product_nameFROM member_sales_cte AS s
JOIN menu AS m2ON s.product_id = m2.product_idWHERE rank = 1;
Trang 87 Which item was purchased just before the customerbecame a member?
WITH prior_member_purchased_cte AS(
SELECT s.customer_id, m.join_date, s.order_date, s.product_id,DENSE_RANK() OVER(PARTITION BY s.customer_id
ORDER BY s.order_date DESC) AS rankFROM sales AS s
JOIN members AS mON s.customer_id = m.customer_idWHERE s.order_date < m.join_date)
SELECT s.customer_id, s.order_date, m2.product_nameFROM prior_member_purchased_cte AS s
JOIN menu AS m2ON s.product_id = m2.product_idWHERE rank = 1;
Trang 98 What is the total items and amount spent for eachmember before they became a member?
SELECT s.customer_id, COUNT(DISTINCT s.product_id) ASunique_menu_item, SUM(mm.price) AS total_sales
FROM sales AS sJOIN members AS mON s.customer_id = m.customer_idJOIN menu AS mm
ON s.product_id = mm.product_idWHERE s.order_date < m.join_dateGROUP BY s.customer_id;
9 If each $1 spent equates to 10 points and sushi has a2x points multiplier— how many points would eachcustomer have?
WITH price_points AS(
SELECT *,CASEWHEN product_id = 1 THEN price * 20ELSE price * 10
END AS pointsFROM menu)
Trang 10SELECT s.customer_id, SUM(p.points) AS total_pointsFROM price_points_cte AS p
JOIN sales AS sON p.product_id = s.product_idGROUP BY s.customer_id
10 In the first week after a customer joins the program(including their join date) they earn 2x points on allitems, not just sushi — how many points do customer Aand B have at the end of January?
WITH dates_cte AS(
SELECT *,DATEADD(DAY, 6, join_date) AS valid_date,EOMONTH('2021-01-31') AS last_dateFROM members AS m
)SELECT d.customer_id, s.order_date, d.join_date,d.valid_date, d.last_date, m.product_name, m.price,SUM(CASE
WHEN m.product_name = 'sushi' THEN 2 * 10 *m.price
WHEN s.order_date BETWEEN d.join_date AND d.valid_dateTHEN 2 * 10 * m.price
ELSE 10 * m.priceEND) AS points
Trang 11FROM dates_cte AS dJOIN sales AS s
ON d.customer_id = s.customer_idJOIN menu AS m
ON s.product_id = m.product_idWHERE s.order_date < d.last_dateGROUP BY d.customer_id, s.order_date, d.join_date,d.valid_date, d.last_date,
m.product_name, m.price
11)Join All The Things Recreate the table with:customer_id, order_date, product_name, price, member(Y/N)
SELECT s.customer_id, s.order_date, m.product_name,m.price,
CASEWHEN mm.join_date > s.order_date THEN 'N'WHEN mm.join_date <= s.order_date THEN 'Y'ELSE 'N'
END AS memberFROM sales AS sLEFT JOIN menu AS mON s.product_id = m.product_idLEFT JOIN members AS mmON s.customer_id = mm.customer_id;
Trang 1212) Rank All The Things
WITH summary_cte AS(
SELECT s.customer_id, s.order_date, m.product_name,m.price,
CASEWHEN mm.join_date > s.order_date THEN 'N'WHEN mm.join_date <= s.order_date THEN 'Y'ELSE 'N' END AS member
FROM sales AS sLEFT JOIN menu AS mON s.product_id = m.product_idLEFT JOIN members AS mmON s.customer_id = mm.customer_id)SELECT *, CASE
WHEN member = 'N' then NULLELSE
RANK () OVER(PARTITION BY customer_id, memberORDER BY order_date) END AS ranking
FROM summary_cte;
Trang 13Note that customers can order multiple pizzas in a singleorder with varying exclusionsand extras values even if thepizza is the same type! The exclusions and extras columns willneed to be cleaned up before using them in your queries.
Trang 14There are some known data issues with this table so becareful when using this in your queries - make sure to checkthe data types for each column in the schema SQL!
Trang 16Table: pizza_toppings
This table contains all of the topping_name values with theircorresponding topping_id value
Trang 17Perform Data Cleaning and Transformation ?How many pizzas were ordered?
How many unique customer orders were made?How many successful orders were delivered by each runner?How many of each type of pizza was delivered?
How many Vegetarian and Meatlovers were ordered by eachcustomer?
What was the maximum number of pizzas delivered in asingle order?
For each customer, how many delivered pizzas had at least 1change and how many had no changes?
How many pizzas were delivered that had both exclusionsand extras?
What was the total volume of pizzas ordered for each hour ofthe day?
What was the volume of orders for each day of the week?How many runners signed up for each 1 week period? (i.e.week starts 2021-01-01)
What was the average time in minutes it took for each runnerto arrive at the Pizza Runner HQ to pickup the order?
TRY FOLLOWING QUESTIONS
1.2.3.4.5.6.7.8.9.10.11.12.13
Trang 18TRY FOLLOWING QUESTIONS
14 Is there any relationship between the number of pizzas andhow long the order takes to prepare?
15 What was the average distance travelled for each customer?16 What was the difference between the longest and shortestdelivery times for all orders?
17 What was the average speed for each runner for eachdelivery and do you notice any trend for these values?18 What is the successful delivery percentage for each runner?
Trang 191)Data Cleaning and Transformation
Firstly, to clean up exclusions and extras in the customer_orders— we create TEMP TABLE #customer_orders and use CASEWHEN
SELECT order_id, customer_id, pizza_id, CASE
WHEN exclusions IS null OR exclusions LIKE 'null' THEN ' 'ELSE exclusions
END AS exclusions,CASE
WHEN extras IS NULL or extras LIKE 'null' THEN ' 'ELSE extras
END AS extras, order_timeINTO #customer_orders create TEMP TABLEFROM customer_orders;
Then, we clean the runner_orders table with CASE WHEN andTRIM and create TEMP TABLE #runner_orders
In summary,·pickup_time — Remove nulls and replace with ‘ ‘·distance — Remove ‘km’ and nulls
·duration — Remove ‘minutes’ and nulls·cancellation — Remove NULL and null and replace with ‘ ‘
Trang 20·SELECT order_id, runner_id, CASE
WHEN pickup_time LIKE 'null' THEN ' ' ELSE pickup_time
END AS pickup_time, CASE
WHEN distance LIKE 'null' THEN ' ' WHEN distance LIKE '%km' THEN TRIM('km' from distance) ELSE distance END AS distance,
CASE WHEN duration LIKE 'null' THEN ' ' WHEN duration LIKE '%mins' THEN TRIM('mins' from duration) WHEN duration LIKE '%minute' THEN TRIM('minute' fromduration)
WHEN duration LIKE '%minutes' THEN TRIM('minutes' fromduration)
ELSE duration END AS duration, CASE
WHEN cancellation IS NULL or cancellation LIKE 'null' THEN '' ELSE cancellation END AS cancellation
INTO #runner_orders FROM runner_orders;Then, we alter the date according to its correct data type.·pickup_time to DATETIME type
·distance to FLOAT type·duration to INT type
Trang 21ALTER TABLE #runner_ordersALTER COLUMN pickup_time DATETIME,ALTER COLUMN distance FLOAT,
ALTER COLUMN duration INT;
2)How many pizzas were ordered?
SELECT COUNT(*) AS pizza_order_count FROM #customer_orders;
3) How many unique customer orders were made?
SELECT COUNT(DISTINCT order_id) AS unique_order_count FROM #customer_orders;
4) How many successful orders were delivered by eachrunner?
SELECT runner_id, COUNT(order_id) AS successful_orders FROM #runner_orders
WHERE distance != 0 GROUP BY runner_id;
5) How many of each type of pizza was delivered?
SELECT p.pizza_name, COUNT(c.pizza_id) ASdelivered_pizza_count
FROM #customer_orders AS c JOIN #runner_orders AS r
Trang 22ON c.pizza_id = p.pizza_id WHERE r.distance != 0 GROUP BY p.pizza_name;
6) How many Vegetarian and Meatlovers were ordered byeach customer?
SELECT c.customer_id, p.pizza_name, COUNT(p.pizza_name) ASorder_count
FROM #customer_orders AS c JOIN pizza_names AS p
ON c.pizza_id= p.pizza_id GROUP BY c.customer_id, p.pizza_name ORDER BY c.customer_id;
7)What was the maximum number of pizzas delivered in asingle order?
WITH pizza_count_cte AS (
SELECT c.order_id, COUNT(c.pizza_id) AS pizza_per_order FROM #customer_orders AS c
JOIN #runner_orders AS r ON c.order_id = r.order_id WHERE r.distance != 0 GROUP BY c.order_id )
SELECT MAX(pizza_per_order) AS pizza_count FROM pizza_count_cte;
Trang 238) For each customer, how many delivered pizzas had atleast 1 change and how many had no changes?
SELECT c.customer_id, SUM(CASE
WHEN c.exclusions <> ' ' OR c.extras <> ' ' THEN 1 ELSE 0
END) AS at_least_1_change, SUM(CASE
WHEN c.exclusions = ' ' AND c.extras = ' ' THEN 1 ELSE 0
END) AS no_change FROM #customer_orders AS c JOIN #runner_orders AS r ON c.order_id = r.order_id WHERE r.distance != 0 GROUP BY c.customer_id ORDER BY c.customer_id;
9) How many pizzas were delivered that had bothexclusions and extras?
SELECT SUM(CASE WHEN exclusions IS NOT NULL AND extras IS NOT NULL THEN 1 ELSE 0
END) AS pizza_count_w_exclusions_extras
Trang 24ON c.order_id = r.order_id WHERE r.distance >= 1 AND exclusions <> ' ' AND extras <> ' ';
10) What was the total volume of pizzas ordered for eachhour of the day?
SELECT DATEPART(HOUR, [order_time]) AS hour_of_day, COUNT(order_id) AS pizza_count
FROM #customer_orders GROUP BY DATEPART(HOUR, [order_time]);
11) What was the volume of orders for each day of theweek?
SELECT FORMAT(DATEADD(DAY, 2, order_time),'dddd') ASday_of_week,
add 2 to adjust 1st day of the week as Monday COUNT(order_id) AS total_pizzas_ordered
FROM #customer_orders GROUP BY FORMAT(DATEADD(DAY, 2, order_time),'dddd');
12)How many runners signed up for each 1 week period?(i.e week starts 2021-01-01)
SELECT DATEPART(WEEK, registration_date) AS registration_week, COUNT(runner_id) AS runner_signup
FROM runnersGROUP BY DATEPART(WEEK, registration_date);
Trang 2513) What was the average time in minutes it took for eachrunner to arrive at the Pizza Runner HQ to pickup theorder?
WITH time_taken_cte AS (
SELECT c.order_id, c.order_time, r.pickup_time, DATEDIFF(MINUTE, c.order_time, r.pickup_time) ASpickup_minutes
FROM #customer_orders AS c JOIN #runner_orders AS r ON c.order_id = r.order_id WHERE r.distance != 0 GROUP BY c.order_id, c.order_time, r.pickup_time )
SELECT AVG(pickup_minutes) AS avg_pickup_minutesFROM time_taken_cte
WHERE pickup_minutes > 1;
14) Is there any relationship between the number ofpizzas and how long the order takes to prepare?
WITH prep_time_cte AS (
SELECT c.order_id, COUNT(c.order_id) AS pizza_order, c.order_time, r.pickup_time,
DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS
Trang 26JOIN #runner_orders AS r ON c.order_id = r.order_id WHERE r.distance != 0 GROUP BY c.order_id, c.order_time, r.pickup_time )
SELECT pizza_order, AVG(prep_time_minutes) ASavg_prep_time_minutes
FROM prep_time_cteWHERE prep_time_minutes > 1GROUP BY pizza_order;
15) What was the average distance travelled for eachcustomer?
SELECT c.customer_id, AVG(r.distance) AS avg_distance FROM #customer_orders AS c
JOIN #runner_orders AS r ON c.order_id = r.order_id WHERE r.duration != 0 GROUP BY c.customer_id;
16) What was the difference between the longest andshortest delivery times for all orders?
Firstly, let’s see all the durations for the orders.SELECT order_id, duration
FROM #runner_orders WHERE duration not like ' ';
Trang 27Then, we find the difference by deducting the shortest (MIN)from the longest (MAX) delivery times.
SELECT MAX(duration::NUMERIC) - MIN(duration::NUMERIC) ASdelivery_time_difference
FROM #runner_orders WHERE duration not like '% %'
17) What was the average speed for each runner for eachdelivery and do you notice any trend for these values?
SELECT r.runner_id, c.customer_id, c.order_id, COUNT(c.order_id) AS pizza_count,
r.distance, (r.duration / 60) AS duration_hr , ROUND((r.distance/r.duration * 60), 2) AS avg_speed FROM #runner_orders AS r
JOIN #customer_orders AS c ON r.order_id = c.order_id WHERE distance != 0 GROUP BY r.runner_id, c.customer_id, c.order_id, r.distance,r.duration
ORDER BY c.order_id;