Intermediate SQL

Master intermediate SQL with techniques on filtering, joins, subqueries, aggregations, and more. Enhance your data querying skills!

Saartje Ly

Data Engineering Intern

August 7, 2024

INTRO

Let's say we have a table called Employees, and a table called Departments.



ADVANCED FILTERING

IN Operator matches any value within a specified list. SELECT * FROM Employees WHERE Department IN ('Sales', 'Marketing');

BETWEEN Operator filters data within a range. SELECT * FROM Employees WHERE Salary BETWEEN 50000 AND 70000;

LIKE Operator performs pattern matching with wildcards. SELECT * FROM Employees WHERE Name LIKE 'J%'; -- Names starting with 'J'.


SUBQUERIES

Single-row Subqueries return a single value. SELECT Name FROM Employees WHERE Salary = (SELECT MAX(Salary) FROM Employees);

Multiple-row Subqueries return multiple rows. SELECT Name FROM Employees WHERE Department IN (SELECT Department FROM Departments WHERE Location = 'New York');

Correlated Subqueries reference columns from the outer query. SELECT Name FROM Employees e1 WHERE Salary > (SELECT AVG(Salary) FROM Employees e2 WHERE e1.Department = e2.Department);


JOINS

INNER JOIN returns only matching rows from both tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
INNER JOIN Departments d ON e.DepartmentID = d.DepartmentID;

LEFT JOIN (or LEFT OUTER JOIN) returns all rows from the left table, and matched rows from the right table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
LEFT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

RIGHT JOIN (or RIGHT OUTER JOIN) returns all rows from the right table, and matched rows from the left table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
RIGHT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

FULL JOIN (or FULL OUTER JOIN) returns rows when there is a match in one of the tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
FULL JOIN Departments d ON e.DepartmentID = d.DepartmentID;


GROUPING AND AGGREGATIONS

GROUP BY groups rows sharing the same value in specified columns into summary rows.

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department;

HAVING filters groups based on a condition (used with GROUP BY)

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department 
HAVING AVG(Salary) > 60000;


WINDOW FUNCTIONS

ROW_NUMBER() assigns a unique number to each row based on a specified order.

SELECT Name, Salary, ROW_NUMBER() OVER (ORDER BY Salary DESC) AS RowNum
FROM Employees;

RANK() assigns a rank to each row within the partition of a result set.

SELECT Name, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank
FROM Employees;

SUM() OVER() calculates a running total or cumulative sum.

SELECT Name, Salary, SUM(Salary) OVER (ORDER BY Salary) AS CumulativeSalary
FROM Employees;


COMMON TABLE EXPRESSIONS

WITH Clause: Defines a temporary result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.

WITH DepartmentSalaries AS (
    SELECT Department, AVG(Salary) AS AvgSalary
    FROM Employees
    GROUP BY Department
)
SELECT Department, AvgSalary
FROM DepartmentSalaries
WHERE AvgSalary > 60000;


SET OPERATIONS

UNION combines results from multiple SELECT statements, removing duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION
SELECT Name FROM Employees WHERE Department = 'Marketing';

UNION ALL combines results from multiple SELECT statements, including duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION ALL
SELECT Name FROM Employees WHERE Department = 'Marketing';

INTERSECT returns common rows from multiple SELECT statements.

SELECT Name FROM Employees WHERE Department = 'Sales'
INTERSECT
SELECT Name FROM Employees WHERE Salary > 50000;

EXCEPT returns rows from the first SELECT statement that are not in the second SELECT statement.

SELECT Name FROM Employees WHERE Department = 'Sales'
EXCEPT
SELECT Name FROM Employees WHERE Salary < 40000;

INTRO

Let's say we have a table called Employees, and a table called Departments.



ADVANCED FILTERING

IN Operator matches any value within a specified list. SELECT * FROM Employees WHERE Department IN ('Sales', 'Marketing');

BETWEEN Operator filters data within a range. SELECT * FROM Employees WHERE Salary BETWEEN 50000 AND 70000;

LIKE Operator performs pattern matching with wildcards. SELECT * FROM Employees WHERE Name LIKE 'J%'; -- Names starting with 'J'.


SUBQUERIES

Single-row Subqueries return a single value. SELECT Name FROM Employees WHERE Salary = (SELECT MAX(Salary) FROM Employees);

Multiple-row Subqueries return multiple rows. SELECT Name FROM Employees WHERE Department IN (SELECT Department FROM Departments WHERE Location = 'New York');

Correlated Subqueries reference columns from the outer query. SELECT Name FROM Employees e1 WHERE Salary > (SELECT AVG(Salary) FROM Employees e2 WHERE e1.Department = e2.Department);


JOINS

INNER JOIN returns only matching rows from both tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
INNER JOIN Departments d ON e.DepartmentID = d.DepartmentID;

LEFT JOIN (or LEFT OUTER JOIN) returns all rows from the left table, and matched rows from the right table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
LEFT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

RIGHT JOIN (or RIGHT OUTER JOIN) returns all rows from the right table, and matched rows from the left table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
RIGHT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

FULL JOIN (or FULL OUTER JOIN) returns rows when there is a match in one of the tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
FULL JOIN Departments d ON e.DepartmentID = d.DepartmentID;


GROUPING AND AGGREGATIONS

GROUP BY groups rows sharing the same value in specified columns into summary rows.

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department;

HAVING filters groups based on a condition (used with GROUP BY)

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department 
HAVING AVG(Salary) > 60000;


WINDOW FUNCTIONS

ROW_NUMBER() assigns a unique number to each row based on a specified order.

SELECT Name, Salary, ROW_NUMBER() OVER (ORDER BY Salary DESC) AS RowNum
FROM Employees;

RANK() assigns a rank to each row within the partition of a result set.

SELECT Name, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank
FROM Employees;

SUM() OVER() calculates a running total or cumulative sum.

SELECT Name, Salary, SUM(Salary) OVER (ORDER BY Salary) AS CumulativeSalary
FROM Employees;


COMMON TABLE EXPRESSIONS

WITH Clause: Defines a temporary result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.

WITH DepartmentSalaries AS (
    SELECT Department, AVG(Salary) AS AvgSalary
    FROM Employees
    GROUP BY Department
)
SELECT Department, AvgSalary
FROM DepartmentSalaries
WHERE AvgSalary > 60000;


SET OPERATIONS

UNION combines results from multiple SELECT statements, removing duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION
SELECT Name FROM Employees WHERE Department = 'Marketing';

UNION ALL combines results from multiple SELECT statements, including duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION ALL
SELECT Name FROM Employees WHERE Department = 'Marketing';

INTERSECT returns common rows from multiple SELECT statements.

SELECT Name FROM Employees WHERE Department = 'Sales'
INTERSECT
SELECT Name FROM Employees WHERE Salary > 50000;

EXCEPT returns rows from the first SELECT statement that are not in the second SELECT statement.

SELECT Name FROM Employees WHERE Department = 'Sales'
EXCEPT
SELECT Name FROM Employees WHERE Salary < 40000;

INTRO

Let's say we have a table called Employees, and a table called Departments.



ADVANCED FILTERING

IN Operator matches any value within a specified list. SELECT * FROM Employees WHERE Department IN ('Sales', 'Marketing');

BETWEEN Operator filters data within a range. SELECT * FROM Employees WHERE Salary BETWEEN 50000 AND 70000;

LIKE Operator performs pattern matching with wildcards. SELECT * FROM Employees WHERE Name LIKE 'J%'; -- Names starting with 'J'.


SUBQUERIES

Single-row Subqueries return a single value. SELECT Name FROM Employees WHERE Salary = (SELECT MAX(Salary) FROM Employees);

Multiple-row Subqueries return multiple rows. SELECT Name FROM Employees WHERE Department IN (SELECT Department FROM Departments WHERE Location = 'New York');

Correlated Subqueries reference columns from the outer query. SELECT Name FROM Employees e1 WHERE Salary > (SELECT AVG(Salary) FROM Employees e2 WHERE e1.Department = e2.Department);


JOINS

INNER JOIN returns only matching rows from both tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
INNER JOIN Departments d ON e.DepartmentID = d.DepartmentID;

LEFT JOIN (or LEFT OUTER JOIN) returns all rows from the left table, and matched rows from the right table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
LEFT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

RIGHT JOIN (or RIGHT OUTER JOIN) returns all rows from the right table, and matched rows from the left table.

SELECT e.Name, d.DepartmentName 
FROM Employees e
RIGHT JOIN Departments d ON e.DepartmentID = d.DepartmentID;

FULL JOIN (or FULL OUTER JOIN) returns rows when there is a match in one of the tables.

SELECT e.Name, d.DepartmentName 
FROM Employees e
FULL JOIN Departments d ON e.DepartmentID = d.DepartmentID;


GROUPING AND AGGREGATIONS

GROUP BY groups rows sharing the same value in specified columns into summary rows.

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department;

HAVING filters groups based on a condition (used with GROUP BY)

SELECT Department, AVG(Salary) AS AvgSalary 
FROM Employees 
GROUP BY Department 
HAVING AVG(Salary) > 60000;


WINDOW FUNCTIONS

ROW_NUMBER() assigns a unique number to each row based on a specified order.

SELECT Name, Salary, ROW_NUMBER() OVER (ORDER BY Salary DESC) AS RowNum
FROM Employees;

RANK() assigns a rank to each row within the partition of a result set.

SELECT Name, Salary, RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS Rank
FROM Employees;

SUM() OVER() calculates a running total or cumulative sum.

SELECT Name, Salary, SUM(Salary) OVER (ORDER BY Salary) AS CumulativeSalary
FROM Employees;


COMMON TABLE EXPRESSIONS

WITH Clause: Defines a temporary result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.

WITH DepartmentSalaries AS (
    SELECT Department, AVG(Salary) AS AvgSalary
    FROM Employees
    GROUP BY Department
)
SELECT Department, AvgSalary
FROM DepartmentSalaries
WHERE AvgSalary > 60000;


SET OPERATIONS

UNION combines results from multiple SELECT statements, removing duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION
SELECT Name FROM Employees WHERE Department = 'Marketing';

UNION ALL combines results from multiple SELECT statements, including duplicates.

SELECT Name FROM Employees WHERE Department = 'Sales'
UNION ALL
SELECT Name FROM Employees WHERE Department = 'Marketing';

INTERSECT returns common rows from multiple SELECT statements.

SELECT Name FROM Employees WHERE Department = 'Sales'
INTERSECT
SELECT Name FROM Employees WHERE Salary > 50000;

EXCEPT returns rows from the first SELECT statement that are not in the second SELECT statement.

SELECT Name FROM Employees WHERE Department = 'Sales'
EXCEPT
SELECT Name FROM Employees WHERE Salary < 40000;

SHARE