Setting

Los Angeles

Northridge, CA
contact@adlibhost.com

Project 1: Most Profitable Movies and Most Popular Actors and Directors According to the IMBD Top 1000

–1 count how many genres are there

SELECT COUNT(DISTINCT value) AS unique_genre_count
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)

–2 count how many movies are in each genre without trim

SELECT genre_value, COUNT(*) AS movie_count
FROM (
SELECT value AS genre_value
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genres
GROUP BY genre_value
ORDER BY movie_count DESC;

–3 count how many movies are in each genre (better)

SELECT TRIM(value) AS genre_value, COUNT(*) AS movie_count
FROM (
SELECT LTRIM(RTRIM(value)) AS value
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genres
GROUP BY TRIM(value)
ORDER BY movie_count DESC;

–4 get the minimum rating and maxmum rating (IMDB)

SELECT MIN(IMDB_Rating) AS min_rating, MAX(IMDB_Rating) AS max_rating, AVG(IMDB_Rating) AS average_rating
FROM IMDBTop1000;

–5 min, max, and average year(rounded)

SELECT MIN(Released_Year) AS min_year,
MAX(Released_Year) AS max_year,
ROUND(AVG(Released_Year), 0) AS avg_year
FROM IMDBTop1000;

–6 min, max, and average runtime(rounded) with conversions because runtime is nvarchar

SELECT MIN(CONVERT(INT, LEFT(runtime, CHARINDEX(‘ ‘, runtime) – 1))) AS min_runtime,
MAX(CONVERT(INT, LEFT(runtime, CHARINDEX(‘ ‘, runtime) – 1))) AS max_runtime,
AVG(CONVERT(FLOAT, LEFT(runtime, CHARINDEX(‘ ‘, runtime) – 1))) AS avg_runtime
FROM IMDBTop1000;

–7 min, max, and average meta_score

SELECT
MIN(Meta_score) AS Min_Meta_score,
MAX(Meta_score) AS Max_Meta_score,
AVG(CAST(Meta_score AS FLOAT)) AS Avg_Meta_score
FROM IMDBTop1000;

–8 count how many times each director appeared on this table

SELECT Director, COUNT(*) AS DirectorCount
FROM IMDBTop1000
GROUP BY Director
ORDER BY DirectorCount DESC;

–9 count how many times each star appeared on this table from all 4 columns ‘star1’, ‘star2’, ‘star3’, ‘star4’

SELECT Star1, COUNT(*) AS StarCount
FROM (
SELECT Star1 FROM IMDBTop1000
UNION ALL
SELECT Star2 FROM IMDBTop1000
UNION ALL
SELECT Star3 FROM IMDBTop1000
UNION ALL
SELECT Star4 FROM IMDBTop1000
) AS AllStars
GROUP BY Star1
ORDER BY StarCount DESC;

–INSIGHTS
— 10 average rating of each genre

SELECT
TRIM(value) AS genre_value,
COUNT(*) AS movie_count,
AVG(IMDB_Rating) AS average_rating
FROM (
SELECT LTRIM(RTRIM(value)) AS value, IMDB_Rating
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genres
GROUP BY TRIM(value)
ORDER BY average_rating DESC;

–11 min, max, and average number of votes

SELECT
MIN(No_of_Votes) AS MinVotes,
MAX(No_of_Votes) AS MaxVotes,
AVG(No_of_Votes) AS AvgVotes
FROM IMDBTop1000;

— 12 Vote number range and average rating

SELECT
CASE
WHEN No_of_Votes >= 25088 AND No_of_Votes < 273693 THEN ‘25088-273693’
WHEN No_of_Votes >= 273693 AND No_of_Votes <= 2343110 THEN ‘273693-2343110’
ELSE ‘Other’
END AS VoteRange,
AVG(IMDB_Rating) AS AverageRating
FROM IMDBTop1000
GROUP BY
CASE
WHEN No_of_Votes >= 25088 AND No_of_Votes < 273693 THEN ‘25088-273693’
WHEN No_of_Votes >= 273693 AND No_of_Votes <= 2343110 THEN ‘273693-2343110’
ELSE ‘Other’
END;

–13 voter range split into 10

SELECT
CASE
WHEN No_of_Votes >= 25088 AND No_of_Votes < 256890 THEN ‘25088-256890’
WHEN No_of_Votes >= 256890 AND No_of_Votes < 488692 THEN ‘256890-488692’
WHEN No_of_Votes >= 488692 AND No_of_Votes < 720495 THEN ‘488692-720495’
WHEN No_of_Votes >= 720495 AND No_of_Votes < 952296 THEN ‘720495-952296’
WHEN No_of_Votes >= 952296 AND No_of_Votes < 1180099 THEN ‘952296-1180099’
WHEN No_of_Votes >= 1180099 AND No_of_Votes < 1407901 THEN ‘1180099-1407901’
WHEN No_of_Votes >= 1407901 AND No_of_Votes < 1635703 THEN ‘1407901-1635703’
WHEN No_of_Votes >= 1635703 AND No_of_Votes < 1863506 THEN ‘1635703-1863506’
WHEN No_of_Votes >= 1863506 AND No_of_Votes < 2091308 THEN ‘1863506-2091308’
WHEN No_of_Votes >= 2091308 AND No_of_Votes <= 2343110 THEN ‘2091308-2343110’
ELSE ‘Other’
END AS VoteRange,
AVG(IMDB_Rating) AS AverageRating
FROM IMDBTop1000
GROUP BY
CASE
WHEN No_of_Votes >= 25088 AND No_of_Votes < 256890 THEN ‘25088-256890’
WHEN No_of_Votes >= 256890 AND No_of_Votes < 488692 THEN ‘256890-488692’
WHEN No_of_Votes >= 488692 AND No_of_Votes < 720495 THEN ‘488692-720495’
WHEN No_of_Votes >= 720495 AND No_of_Votes < 952296 THEN ‘720495-952296’
WHEN No_of_Votes >= 952296 AND No_of_Votes < 1180099 THEN ‘952296-1180099’
WHEN No_of_Votes >= 1180099 AND No_of_Votes < 1407901 THEN ‘1180099-1407901’
WHEN No_of_Votes >= 1407901 AND No_of_Votes < 1635703 THEN ‘1407901-1635703’
WHEN No_of_Votes >= 1635703 AND No_of_Votes < 1863506 THEN ‘1635703-1863506’
WHEN No_of_Votes >= 1863506 AND No_of_Votes < 2091308 THEN ‘1863506-2091308’
WHEN No_of_Votes >= 2091308 AND No_of_Votes <= 2343110 THEN ‘2091308-2343110’
ELSE ‘Other’
END
ORDER BY VoteRange;

–14 runtime ranges and the average rating of each split into 3

SELECT
CASE
WHEN runtime_int >= 45 AND runtime_int <= 137 THEN ’45-137′
WHEN runtime_int > 137 AND runtime_int <= 229 THEN ‘138-229’
WHEN runtime_int > 229 AND runtime_int <= 321 THEN ‘230-321’
ELSE ‘Other’
END AS RuntimeRange,
AVG(IMDB_Rating) AS AverageRating
FROM (
SELECT
CAST(SUBSTRING(Runtime, 1, PATINDEX(‘%[^0-9]%’, Runtime) – 1) AS INT) AS runtime_int,
IMDB_Rating
FROM IMDBTop1000
) AS RuntimeData
GROUP BY
CASE
WHEN runtime_int >= 45 AND runtime_int <= 137 THEN ’45-137′
WHEN runtime_int > 137 AND runtime_int <= 229 THEN ‘138-229’
WHEN runtime_int > 229 AND runtime_int <= 321 THEN ‘230-321’
ELSE ‘Other’
END
ORDER BY AverageRating;

–15 top rated movie in each genre

WITH TopRatedMovies AS (
SELECT
genre_value,
MAX(title) AS top_rated_movie,
MAX(IMDB_Rating) AS max_rating
FROM (
SELECT TRIM(value) AS genre_value,
title,
IMDB_Rating,
ROW_NUMBER() OVER (PARTITION BY TRIM(value) ORDER BY IMDB_Rating DESC) AS rn
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genre_movies
WHERE rn = 1
GROUP BY genre_value
),
TopRatedMoviesWithMetaScore AS (
SELECT
TRM.genre_value AS genre,
TRM.top_rated_movie,
TRM.max_rating AS max_rating,
IMDB.Meta_score AS meta_score
FROM TopRatedMovies AS TRM
LEFT JOIN IMDBTop1000 AS IMDB ON TRM.top_rated_movie = IMDB.title
)
SELECT
genre,
top_rated_movie,
max_rating,
meta_score
FROM TopRatedMoviesWithMetaScore
ORDER BY max_rating DESC;

–16 Top IMDB rating for each genre with Metascore

WITH TopMetaScoreMovies AS (
SELECT
genre_value,
MAX(title) AS top_meta_score_movie,
MAX(Meta_score) AS max_meta_score
FROM (
SELECT TRIM(value) AS genre_value,
title,
Meta_score,
ROW_NUMBER() OVER (PARTITION BY TRIM(value) ORDER BY Meta_score DESC) AS rn
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genre_movies
WHERE rn = 1
GROUP BY genre_value
),
TopMetaScoreMoviesWithIMDBRating AS (
SELECT
TMM.genre_value AS genre,
TMM.top_meta_score_movie,
TMM.max_meta_score AS max_meta_score,
IMDB.IMDB_Rating AS IMDB_Rating
FROM TopMetaScoreMovies AS TMM
LEFT JOIN IMDBTop1000 AS IMDB ON TMM.top_meta_score_movie = IMDB.title
)
SELECT
genre,
top_meta_score_movie,
max_meta_score,
IMDB_Rating
FROM TopMetaScoreMoviesWithIMDBRating
ORDER BY IMDB_Rating DESC;

–17 Top Metascore for each genre with IMDB rating

WITH TopMetaScoreMovies AS (
SELECT
genre_value,
MAX(title) AS top_meta_score_movie,
MAX(Meta_score) AS max_meta_score
FROM (
SELECT TRIM(value) AS genre_value,
title,
Meta_score,
ROW_NUMBER() OVER (PARTITION BY TRIM(value) ORDER BY Meta_score DESC) AS rn
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genre_movies
WHERE rn = 1
GROUP BY genre_value
),
TopMetaScoreMoviesWithIMDBRating AS (
SELECT
TMM.genre_value AS genre,
TMM.top_meta_score_movie,
TMM.max_meta_score AS max_meta_score,
IMDB.IMDB_Rating AS IMDB_Rating
FROM TopMetaScoreMovies AS TMM
LEFT JOIN IMDBTop1000 AS IMDB ON TMM.top_meta_score_movie = IMDB.title
)
SELECT
genre,
top_meta_score_movie,
max_meta_score,
IMDB_Rating
FROM TopMetaScoreMoviesWithIMDBRating
ORDER BY max_meta_score DESC;

–18 Top Rated and top Meta Score for each genre side by side

WITH TopRatedMovies AS (
SELECT
genre_value,
MAX(title) AS top_rated_movie,
MAX(IMDB_Rating) AS max_rating
FROM (
SELECT TRIM(value) AS genre_value,
title,
IMDB_Rating,
ROW_NUMBER() OVER (PARTITION BY TRIM(value) ORDER BY IMDB_Rating DESC) AS rn
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genre_movies
WHERE rn = 1
GROUP BY genre_value
),
TopMetaScoreMovies AS (
SELECT
genre_value,
MAX(title) AS top_meta_score_movie,
MAX(Meta_score) AS max_meta_score
FROM (
SELECT TRIM(value) AS genre_value,
title,
Meta_score,
ROW_NUMBER() OVER (PARTITION BY TRIM(value) ORDER BY Meta_score DESC) AS rn
FROM IMDBTop1000
CROSS APPLY STRING_SPLIT(genre, ‘,’)
) AS genre_movies
WHERE rn = 1
GROUP BY genre_value
)
SELECT
TRM.genre_value AS genre,
TRM.top_rated_movie,
TRM.max_rating,
TMM.top_meta_score_movie,
TMM.max_meta_score
FROM TopRatedMovies AS TRM
INNER JOIN TopMetaScoreMovies AS TMM ON TRM.genre_value = TMM.genre_value
ORDER BY TRM.genre_value;

–19 relationship of IMDB Top Rated (1000) to 500 most profitable movies. Which IMDB top movies are in the 500 most profitable list

SELECT t1.title,
t1.Released_Year,
t1.Certificate,
t1.Runtime,
t1.Genre,
t1.IMDB_Rating,
t1.Meta_score,
t1.Director,
t1.Star1,
t1.Star2,
t1.Star3,
t1.Star4,
t1.No_of_Votes,
t2.WorldwideGrossM,
t2.BudgetRecoveredPercent,
t2.XTimesBudgetRecovered,
t2.BudgetMillions,
t2.DomesticGrossM,
t2.DomesticPercent,
t2.InternationalGrossM,
t2.PercentGrossFromInternational,
t2.WorldwideGross,
t2.Year,
t2.Decade,
t2.Source,
t2.Horror

FROM IMDBTop1000 AS t1
INNER JOIN MostProfitable500 AS t2 ON t1.title = t2.title

ORDER BY IMDB_Rating DESC

 

SELECT t1.title,
t1.Released_Year,
t1.Certificate,
t1.Runtime,
t1.Genre,
t1.IMDB_Rating,
t1.Meta_score,
t1.Director,
t1.Star1,
t1.Star2,
t1.Star3,
t1.Star4,
t1.No_of_Votes,
t2.WorldwideGrossM,
t2.BudgetRecoveredPercent,
t2.XTimesBudgetRecovered,
t2.BudgetMillions,
t2.DomesticGrossM,
t2.DomesticPercent,
t2.InternationalGrossM,
t2.PercentGrossFromInternational,
t2.WorldwideGross,
t2.Year,
t2.Decade,
t2.Source,
t2.Horror

FROM IMDBTop1000 AS t1
INNER JOIN MostProfitable500 AS t2 ON t1.title = t2.title

ORDER BY Meta_score DESC

–Top 20 movies in terms of Worldwide Gross. Times bidget recovered and Budget(millions) ranks are also displayed

WITH Top20 AS (
SELECT *,
RANK() OVER (ORDER BY worldwidegross DESC) AS worldwidegross_rank,
RANK() OVER (ORDER BY xtimesbudgetrecovered DESC) AS xtimesbudgetrecovered_rank,
RANK() OVER (ORDER BY budgetmillions DESC) AS budgetmillions_rank
FROM MostProfitable500
)
SELECT
title, WorldwideGross, XTimesBudgetRecovered, BudgetMillions, worldwidegross_rank, xtimesbudgetrecovered_rank, budgetmillions_rank
FROM Top20
WHERE worldwidegross_rank <= 20
ORDER BY worldwidegross_rank;

Project 2: E-Commerce Sales Analysis of Clothing Sold Through Amazon India

— Alter the table to change the data type of the ‘Amount’ column to DECIMAL(10, 2)
ALTER TABLE AmazonSaleReport
ALTER COLUMN Amount DECIMAL(10, 2);

— Calculate the sum of the ‘Amount’ values, treating NULL as 0
SELECT SUM(ISNULL(Amount, 0)) AS TotalSum
FROM AmazonSaleReport;

— Total Amount for each date ordered by date
SELECT
Date,
SUM(ISNULL(Amount, 0)) AS TotalAmount
FROM
AmazonSaleReport
GROUP BY
Date
ORDER BY
Date;

— Total Amount for each date ordered by TotalAmount
SELECT
Date,
SUM(ISNULL(Amount, 0)) AS TotalAmount
FROM
AmazonSaleReport
GROUP BY
Date
ORDER BY
TotalAmount desc;

–Top 10 product by sales with filtering for shipped orders only
SELECT TOP 10
SKU,
Category,
Size,
SUM(Amount) AS TotalSales
FROM
AmazonSaleReport
WHERE
Status = ‘Shipped – Delivered to Buyer’ — Filter for shipped orders only
GROUP BY
SKU, Category, Size
ORDER BY
TotalSales DESC;

–Average order value
SELECT
AVG(Amount) AS AverageOrderValue
FROM
AmazonSaleReport
WHERE
Status = ‘Shipped – Delivered to Buyer’; — Filter for shipped orders only

–Weekly total revenue ordered by WeeklyTotalAmount
SELECT
DATEPART(WEEK, Date) AS WeekNumber,
MIN(Date) AS WeekStartDate,
MAX(Date) AS WeekEndDate,
SUM(ISNULL(Amount, 0)) AS WeeklyTotalAmount
FROM
AmazonSaleReport
GROUP BY
DATEPART(WEEK, Date)
ORDER BY
WeeklyTotalAmount desc;

–Monthly total revenue ordered by MonthlyTotalAmount
SELECT
DATEPART(YEAR, Date) AS Year,
DATEPART(MONTH, Date) AS Month,
MIN(Date) AS MonthStartDate,
MAX(Date) AS MonthEndDate,
SUM(ISNULL(Amount, 0)) AS MonthlyTotalAmount
FROM
AmazonSaleReport
GROUP BY
DATEPART(YEAR, Date),
DATEPART(MONTH, Date)
ORDER BY
MonthlyTotalAmount;

–Top cities in terms of TotalSales
SELECT
[ship-city] AS City,
SUM(ISNULL(Amount, 0)) AS TotalSales
FROM
AmazonSaleReport
WHERE
Status = ‘Shipped – Delivered to Buyer’ — Filter for shipped orders only
GROUP BY
[ship-city]
ORDER BY
TotalSales DESC;

–Top states in terms of TotalSales
SELECT [ship-state], SUM(Amount) AS TotalSales
FROM AmazonSaleReport
GROUP BY [ship-state]
ORDER BY TotalSales DESC;

–Trends in order cancellations per month
SELECT
DATEPART(YEAR, Date) AS Year,
DATEPART(MONTH, Date) AS Month,
COUNT(*) AS CancellationCount
FROM
AmazonSaleReport
WHERE
Status = ‘Cancelled’
GROUP BY
DATEPART(YEAR, Date),
DATEPART(MONTH, Date)
ORDER BY
Year, Month;

— Calculate the distribution of B2B vs. B2C customers
SELECT
CASE
WHEN B2B = 1 THEN ‘B2B’
ELSE ‘B2C’
END AS CustomerType,
COUNT(*) AS CustomerCount
FROM
AmazonSaleReport
GROUP BY
CASE
WHEN B2B = 1 THEN ‘B2B’
ELSE ‘B2C’
END;

— Percentage of order fulfilled by Amazon vs merchant
SELECT
fulfilment,
COUNT(*) AS TotalOrders,
(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER ()) AS Percentage
FROM AmazonSaleReport
GROUP BY fulfilment;

–Most popular categories
SELECT Category, COUNT(*) AS CategoryCount
FROM AmazonSaleReport
GROUP BY Category
ORDER BY CategoryCount DESC;

–Size distribution for each category
SELECT Category, Size, COUNT(*) AS SizeCount
FROM AmazonSaleReport
GROUP BY Category, Size
ORDER BY Category, SizeCount DESC;

–Which size for each category has the most orders
SELECT a.Category, a.Size, b.MaxSizeCount
FROM (
SELECT Category, Size, COUNT(*) AS SizeCount
FROM AmazonSaleReport
GROUP BY Category, Size
) a
INNER JOIN (
SELECT Category, MAX(SizeCount) AS MaxSizeCount
FROM (
SELECT Category, Size, COUNT(*) AS SizeCount
FROM AmazonSaleReport
GROUP BY Category, Size
) subquery
GROUP BY Category
) b ON a.Category = b.Category AND a.SizeCount = b.MaxSizeCount
ORDER BY a.Category;

–Top 3 most expensive
SELECT TOP 3 SKU, Category, Amount
FROM AmazonSaleReport
ORDER BY Amount DESC;

–Top 3 least expensive products
SELECT TOP 3 SKU, Category, Amount
FROM AmazonSaleReport
WHERE Amount IS NOT NULL AND Amount > 0
ORDER BY Amount ASC;

–Top 3 most popular promotion-ids
SELECT TOP 3 [promotion-ids] AS MostCommonPromotionID, COUNT(*) AS Frequency
FROM AmazonSaleReport
WHERE [promotion-ids] IS NOT NULL
GROUP BY [promotion-ids]
ORDER BY Frequency DESC;

–Top 3 most popular SKU
SELECT TOP 3 SKU, COUNT(*) AS SKU_Count
FROM AmazonSaleReport
GROUP BY SKU
ORDER BY SKU_Count DESC;

–Promotion count for the top purchased SKU (consolidated Free-Financing since there are many types)
WITH ProductPromotionCounts AS (
SELECT SKU, [promotion-ids], COUNT(*) AS PromotionCount
FROM AmazonSaleReport
WHERE SKU = (
SELECT TOP 1 SKU
FROM AmazonSaleReport
GROUP BY SKU
ORDER BY COUNT(*) DESC
)
GROUP BY SKU, [promotion-ids]
)

SELECT SKU,
CASE
WHEN [promotion-ids] LIKE ‘%Free-Financing%’ THEN ‘Free-Financing’
ELSE [promotion-ids]
END AS ConsolidatedPromotion,
SUM(PromotionCount) AS TotalPromotionCount
FROM ProductPromotionCounts
GROUP BY SKU,
CASE
WHEN [promotion-ids] LIKE ‘%Free-Financing%’ THEN ‘Free-Financing’
ELSE [promotion-ids]
END
ORDER BY TotalPromotionCount DESC;

–top ship-postal-code and what they order most

SELECT
[ship-postal-code] AS PostalCode,
MAX(Category) AS MostOrderedCategory,
COUNT(*) AS OrderCount
FROM AmazonSaleReport
GROUP BY [ship-postal-code]
ORDER BY OrderCount DESC;

–Top ship-city and what they order most

SELECT
[ship-city] AS City,
MAX(Category) AS MostOrderedCategory,
COUNT(*) AS OrderCount
FROM AmazonSaleReport
GROUP BY [ship-city]
ORDER BY OrderCount DESC;

–Top ship-state and what they order most

SELECT
[ship-state] AS State,
MAX(Category) AS MostOrderedCategory,
COUNT(*) AS OrderCount
FROM AmazonSaleReport
GROUP BY [ship-state]
ORDER BY OrderCount DESC;

Project 3: Starbucks Location, Distance and Hours Analysis

ALTER TABLE StarbucksInCalifornia
ADD Latitude FLOAT, Longitude FLOAT;

UPDATE StarbucksInCalifornia
SET Latitude = CAST(SUBSTRING(Coordinates, 1, CHARINDEX(‘,’, Coordinates) – 1) AS FLOAT),
Longitude = CAST(SUBSTRING(Coordinates, CHARINDEX(‘,’, Coordinates) + 1, LEN(Coordinates)) AS FLOAT);

ALTER TABLE StarbucksInCalifornia
DROP COLUMN Coordinates;

–change all names with spaces to PascalCase
EXEC sp_rename ‘StarbucksInCalifornia.ID’, ‘Id’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Name’, ‘Name’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Ownership Type]’, ‘OwnershipType’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Phone Number]’, ‘PhoneNumber’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Address’, ‘Address’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.City’, ‘City’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.County’, ‘County’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Zip’, ‘Zip’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.State’, ‘State’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Image’, ‘Image’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Regular hours]’, ‘RegularHours’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Saturday Openint TImes]’, ‘SaturdayOpeningTimes’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Sunday Opening Times]’, ‘SundayOpeningTimes’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[24-Hour Service]’, ‘TwentyFourHourService’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Starbucks Reserve-Clover Brewed]’, ‘StarbucksReserveCloverBrewed’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Oven-Warmed Food]’, ‘OvenWarmedFood’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Free Wi-Fi]’, ‘FreeWiFi’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Verismo System]’, ‘VerismoSystem’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Mobile Payment]’, ‘MobilePayment’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Digital Rewards]’, ‘DigitalRewards’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[La Boulange]’, ‘LaBoulange’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Fizzio Handcrafted Sodas]’, ‘FizzioHandcraftedSodas’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.[Drive-Thru]’, ‘DriveThru’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Link’, ‘Link’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Latitude’, ‘Latitude’, ‘COLUMN’;
EXEC sp_rename ‘StarbucksInCalifornia.Longitude’, ‘Longitude’, ‘COLUMN’;

 

–Starbucks that are closest to each other
WITH CoordinateDistances AS (
SELECT
A.ID AS A_ID,
A.Name AS A_Name,
A.Latitude AS A_Latitude,
A.Longitude AS A_Longitude,
B.ID AS B_ID,
B.Name AS B_Name,
B.Latitude AS B_Latitude,
B.Longitude AS B_Longitude,
3959 * ACOS(SIN(RADIANS(A.Latitude)) * SIN(RADIANS(B.Latitude)) + COS(RADIANS(A.Latitude)) * COS(RADIANS(B.Latitude)) * COS(RADIANS(B.Longitude) – RADIANS(A.Longitude)) ) AS Distance
FROM StarbucksInCalifornia A
JOIN StarbucksInCalifornia B
ON A.ID <> B.ID
)
SELECT TOP 2
A_ID,
A_Name,
A_Latitude,
A_Longitude,
B_ID,
B_Name,
B_Latitude,
B_Longitude,
Distance
FROM CoordinateDistances
ORDER BY Distance;

–Starbucks that are furthest from each other
WITH CoordinateDistances AS (
SELECT
A.ID AS A_ID,
A.Name AS A_Name,
A.Latitude AS A_Latitude,
A.Longitude AS A_Longitude,
B.ID AS B_ID,
B.Name AS B_Name,
B.Latitude AS B_Latitude,
B.Longitude AS B_Longitude,
3959 * ACOS(SIN(RADIANS(A.Latitude)) * SIN(RADIANS(B.Latitude)) + COS(RADIANS(A.Latitude)) * COS(RADIANS(B.Latitude)) * COS(RADIANS(B.Longitude) – RADIANS(A.Longitude)) ) AS Distance
FROM StarbucksInCalifornia A
JOIN StarbucksInCalifornia B
ON A.ID <> B.ID
)
SELECT TOP 2
A_ID,
A_Name,
A_Latitude,
A_Longitude,
B_ID,
B_Name,
B_Latitude,
B_Longitude,
Distance
FROM CoordinateDistances
ORDER BY Distance DESC;

 

 

–Occurrences of regular hours
SELECT RegularHours, COUNT(*) AS Occurrences
FROM StarbucksInCalifornia
GROUP BY RegularHours
ORDER BY Occurrences DESC;

–regular hours open duration (ordered by OpenDuration)
WITH HourDifferences AS (
SELECT
Id,
Name AS StoreName,
Address AS StoreAddress,
Latitude,
Longitude,
RegularHours,
CASE
WHEN RegularHours LIKE ’12:00 AM to 12:00 AM%’ THEN ‘24.0’
ELSE
IIF(
TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME) IS NOT NULL AND
TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME) IS NOT NULL,
IIF(
TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME) < TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME),
CAST(
DATEDIFF(MINUTE, TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME), TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME)) / 60.0
AS DECIMAL(10, 1)
),
CAST(
24.0 – DATEDIFF(MINUTE, TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME), TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME)) / 60.0
AS DECIMAL(10, 1)
)
),
NULL
)
END AS OpenDuration
FROM StarbucksInCalifornia
)
SELECT
Id,
StoreName,
StoreAddress,
Latitude,
Longitude,
RegularHours,
OpenDuration
FROM HourDifferences
ORDER BY OpenDuration DESC;

–Occurrences of each duration
WITH HourDifferences AS (
SELECT
Name AS StoreName,
Address AS StoreAddress,
RegularHours,
CASE
WHEN RegularHours LIKE ’12:00 AM to 12:00 AM%’ THEN ‘24.0’
ELSE
IIF(
TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME) IS NOT NULL AND
TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME) IS NOT NULL,
IIF(
TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME) < TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME),
CAST(
DATEDIFF(MINUTE, TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME), TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME)) / 60.0
AS DECIMAL(10, 1)
),
CAST(
24.0 – DATEDIFF(MINUTE, TRY_CAST(SUBSTRING(RegularHours, 12, 8) AS TIME), TRY_CAST(SUBSTRING(RegularHours, 1, 8) AS TIME)) / 60.0
AS DECIMAL(10, 1)
)
),
NULL
)
END AS OpenDuration
FROM StarbucksInCalifornia
)

SELECT
OpenDuration,
COUNT(*) AS Occurrences
FROM HourDifferences
GROUP BY OpenDuration
ORDER BY Occurrences DESC;

–Total Population represented in the StarbucksWorld table

SELECT SUM(Population) AS TotalPopulation
FROM StarbucksWorld;

–Countries that have Single Digit Number of Starbucks

SELECT *
FROM StarbucksWorld
WHERE LEN([Number of Starbucks]) = 1;

–Top 3 Counties in California in terms of number of Starbucks (and then the top 3 Cities within them also in terms of number of Starbucks)

WITH CountyRanking AS (
SELECT
County,
[Number of Starbucks],
DENSE_RANK() OVER (ORDER BY [Number of Starbucks] DESC) AS CountyRank
FROM StarbucksCaliforniaCounties
),
CityRanking AS (
SELECT
City,
County,
[Number of Starbucks],
ROW_NUMBER() OVER (PARTITION BY County ORDER BY [Number of Starbucks] DESC) AS CityRank
FROM StarbucksCaliforniaCities
)
SELECT
c.County,
c.[Number of Starbucks] AS CountyStarbucks,
ci.City,
ci.[Number of Starbucks] AS CityStarbucks
FROM
CountyRanking c
JOIN
CityRanking ci ON c.County = ci.County AND c.CountyRank <= 3 AND ci.CityRank <= 3
ORDER BY
c.CountyRank, c.County, ci.CityRank;