Intermediate SQL

Master intermediate SQL with techniques on filtering, joins, subqueries, aggregations, and more. Enhance your data querying skills!

Saartje Ly

Data Engineering Intern

INTRO

Let's say we have a table called Employees, and a table called Departments.



ADVANCED FILTERING

IN Operator matches any value within a specified list. SELECT * FROM Employees WHERE Department IN ('Sales', 'Marketing');

BETWEEN Operator filters data within a range. SELECT * FROM Employees WHERE Salary BETWEEN 50000 AND 70000;

LIKE Operator performs pattern matching with wildcards. SELECT * FROM Employees WHERE Name LIKE 'J%'; -- Names starting with 'J'.


SUBQUERIES

Single-row Subqueries return a single value. SELECT Name FROM Employees WHERE Salary = (SELECT MAX(Salary) FROM Employees);

Multiple-row Subqueries return multiple rows. SELECT Name FROM Employees WHERE Department IN (SELECT Department FROM Departments WHERE Location = 'New York');

Correlated Subqueries reference columns from the outer query. SELECT Name FROM Employees e1 WHERE Salary > (SELECT AVG(Salary) FROM Employees e2 WHERE e1.Department = e2.Department);


JOINS

INNER JOIN returns only matching rows from both tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
INNER JOIN Departments d ON e.DepartmentID = d.DepartmentID;

LEFT JOIN (or LEFT OUTER JOIN) returns all rows from the left table, and matched rows from the right table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
LEFT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

RIGHT JOIN (or RIGHT OUTER JOIN) returns all rows from the right table, and matched rows from the left table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
RIGHT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

FULL JOIN (or FULL OUTER JOIN) returns rows when there is a match in one of the tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
FULL JOIN Departments d ON e.DepartmentID = d.DepartmentID;


GROUPING AND AGGREGATIONS

GROUP BY groups rows sharing the same value in specified columns into summary rows.

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department;

HAVING filters groups based on a condition (used with GROUP BY)

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department 
HAVING AVG(Salary) > 60000;


WINDOW FUNCTIONS

ROW_NUMBER() assigns a unique number to each row based on a specified order.

SELECT Name, Salary, ROW_NUMBER() OVER (ORDER BY Salary DESC) AS RowNum
FROM Employees;

RANK() assigns a rank to each row within the partition of a result set.

SELECT Name, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank
FROM Employees;

SUM() OVER() calculates a running total or cumulative sum.

SELECT Name, Salary, SUM(Salary) OVER (ORDER BY Salary) AS CumulativeSalary
FROM Employees;


COMMON TABLE EXPRESSIONS

WITH Clause: Defines a temporary result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.

WITH DepartmentSalaries AS (
    SELECT Department, AVG(Salary) AS AvgSalary
    FROM Employees
    GROUP BY Department
)
SELECT Department, AvgSalary
FROM DepartmentSalaries
WHERE AvgSalary > 60000;


SET OPERATIONS

UNION combines results from multiple SELECT statements, removing duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION
SELECT Name FROM Employees WHERE Department = 'Marketing';

UNION ALL combines results from multiple SELECT statements, including duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION ALL
SELECT Name FROM Employees WHERE Department = 'Marketing';

INTERSECT returns common rows from multiple SELECT statements.

SELECT Name FROM Employees WHERE Department = 'Sales'
INTERSECT
SELECT Name FROM Employees WHERE Salary > 50000;

EXCEPT returns rows from the first SELECT statement that are not in the second SELECT statement.

SELECT Name FROM Employees WHERE Department = 'Sales'
EXCEPT
SELECT Name FROM Employees WHERE Salary < 40000;

INTRO

Let's say we have a table called Employees, and a table called Departments.



ADVANCED FILTERING

IN Operator matches any value within a specified list. SELECT * FROM Employees WHERE Department IN ('Sales', 'Marketing');

BETWEEN Operator filters data within a range. SELECT * FROM Employees WHERE Salary BETWEEN 50000 AND 70000;

LIKE Operator performs pattern matching with wildcards. SELECT * FROM Employees WHERE Name LIKE 'J%'; -- Names starting with 'J'.


SUBQUERIES

Single-row Subqueries return a single value. SELECT Name FROM Employees WHERE Salary = (SELECT MAX(Salary) FROM Employees);

Multiple-row Subqueries return multiple rows. SELECT Name FROM Employees WHERE Department IN (SELECT Department FROM Departments WHERE Location = 'New York');

Correlated Subqueries reference columns from the outer query. SELECT Name FROM Employees e1 WHERE Salary > (SELECT AVG(Salary) FROM Employees e2 WHERE e1.Department = e2.Department);


JOINS

INNER JOIN returns only matching rows from both tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
INNER JOIN Departments d ON e.DepartmentID = d.DepartmentID;

LEFT JOIN (or LEFT OUTER JOIN) returns all rows from the left table, and matched rows from the right table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
LEFT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

RIGHT JOIN (or RIGHT OUTER JOIN) returns all rows from the right table, and matched rows from the left table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
RIGHT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

FULL JOIN (or FULL OUTER JOIN) returns rows when there is a match in one of the tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
FULL JOIN Departments d ON e.DepartmentID = d.DepartmentID;


GROUPING AND AGGREGATIONS

GROUP BY groups rows sharing the same value in specified columns into summary rows.

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department;

HAVING filters groups based on a condition (used with GROUP BY)

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department 
HAVING AVG(Salary) > 60000;


WINDOW FUNCTIONS

ROW_NUMBER() assigns a unique number to each row based on a specified order.

SELECT Name, Salary, ROW_NUMBER() OVER (ORDER BY Salary DESC) AS RowNum
FROM Employees;

RANK() assigns a rank to each row within the partition of a result set.

SELECT Name, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank
FROM Employees;

SUM() OVER() calculates a running total or cumulative sum.

SELECT Name, Salary, SUM(Salary) OVER (ORDER BY Salary) AS CumulativeSalary
FROM Employees;


COMMON TABLE EXPRESSIONS

WITH Clause: Defines a temporary result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.

WITH DepartmentSalaries AS (
    SELECT Department, AVG(Salary) AS AvgSalary
    FROM Employees
    GROUP BY Department
)
SELECT Department, AvgSalary
FROM DepartmentSalaries
WHERE AvgSalary > 60000;


SET OPERATIONS

UNION combines results from multiple SELECT statements, removing duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION
SELECT Name FROM Employees WHERE Department = 'Marketing';

UNION ALL combines results from multiple SELECT statements, including duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION ALL
SELECT Name FROM Employees WHERE Department = 'Marketing';

INTERSECT returns common rows from multiple SELECT statements.

SELECT Name FROM Employees WHERE Department = 'Sales'
INTERSECT
SELECT Name FROM Employees WHERE Salary > 50000;

EXCEPT returns rows from the first SELECT statement that are not in the second SELECT statement.

SELECT Name FROM Employees WHERE Department = 'Sales'
EXCEPT
SELECT Name FROM Employees WHERE Salary < 40000;

SHARE