Harith Zahid

SQL Operations Fundamentals

A beginner-friendly guide to essential SQL operations using Knex.js and MySQL.


Table of Contents

  1. CRUD Operations Basics
  2. Reading Data with SELECT
  3. JOINs - Combining Tables
  4. Aggregate Functions
  5. GROUP BY and HAVING
  6. Subqueries and CTEs
  7. Real-World Examples
  8. Quick Reference

1. CRUD Operations Basics

CRUD stands for Create, Read, Update, Delete - the four basic operations for working with data.

Create - Adding Data

// Insert a single row
async function createUser(name, email) {
  const [userId] = await knex('users').insert({
    name: name,
    email: email,
    balance: 0
  });
  
  return userId; // Returns the new ID
}

// Insert multiple rows at once
async function createMultipleUsers(users) {
  const ids = await knex('users').insert([
    { name: 'Alice', email: 'alice@example.com' },
    { name: 'Bob', email: 'bob@example.com' },
    { name: 'Charlie', email: 'charlie@example.com' }
  ]);
  
  return ids; // Returns array of IDs
}

// Insert and return the created record
async function createUserAndReturn(name, email) {
  const [user] = await knex('users')
    .insert({ name, email })
    .returning(['id', 'name', 'email']); // PostgreSQL only
  
  // For MySQL, query separately:
  const [id] = await knex('users').insert({ name, email });
  const user = await knex('users').where('id', id).first();
  
  return user;
}

Read - Retrieving Data

// Get all rows
async function getAllUsers() {
  return knex('users').select('*');
}

// Get specific columns
async function getUserEmails() {
  return knex('users').select('id', 'email');
}

// Get one row
async function getUserById(userId) {
  return knex('users')
    .where('id', userId)
    .first(); // Returns single object or undefined
}

// Get with conditions
async function getActiveUsers() {
  return knex('users')
    .where('status', 'active')
    .andWhere('balance', '>', 0)
    .select('*');
}

// Get with multiple conditions
async function searchUsers(searchTerm) {
  return knex('users')
    .where('name', 'like', `%${searchTerm}%`)
    .orWhere('email', 'like', `%${searchTerm}%`)
    .select('*');
}

Update - Modifying Data

// Update specific rows
async function updateUserEmail(userId, newEmail) {
  const count = await knex('users')
    .where('id', userId)
    .update({ email: newEmail });
  
  return count; // Number of rows updated
}

// Update multiple columns
async function updateUserProfile(userId, data) {
  return knex('users')
    .where('id', userId)
    .update({
      name: data.name,
      email: data.email,
      updated_at: knex.fn.now()
    });
}

// Increment/Decrement
async function addBalance(userId, amount) {
  return knex('users')
    .where('id', userId)
    .increment('balance', amount);
}

async function subtractBalance(userId, amount) {
  return knex('users')
    .where('id', userId)
    .decrement('balance', amount);
}

// Conditional update
async function activateUser(userId) {
  return knex('users')
    .where('id', userId)
    .andWhere('status', 'pending')
    .update({ status: 'active' });
}

Delete - Removing Data

// Delete specific rows
async function deleteUser(userId) {
  const count = await knex('users')
    .where('id', userId)
    .del(); // or .delete()
  
  return count; // Number of rows deleted
}

// Delete with conditions
async function deleteInactiveUsers() {
  return knex('users')
    .where('status', 'inactive')
    .andWhere('last_login', '<', knex.raw('DATE_SUB(NOW(), INTERVAL 1 YEAR)'))
    .del();
}

// Soft delete (recommended for important data)
async function softDeleteUser(userId) {
  return knex('users')
    .where('id', userId)
    .update({ deleted_at: knex.fn.now() });
}

// Get non-deleted users
async function getActiveUsersOnly() {
  return knex('users')
    .whereNull('deleted_at')
    .select('*');
}

2. Reading Data with SELECT

Master the art of querying data effectively.

Basic Selection

// Select all columns
await knex('products').select('*');

// Select specific columns (better performance)
await knex('products').select('id', 'name', 'price');

// Select with alias
await knex('products')
  .select('id', 'name', 'price as product_price');

Filtering with WHERE

// Single condition
await knex('products')
  .where('category', 'Electronics');

// Multiple AND conditions
await knex('products')
  .where('category', 'Electronics')
  .andWhere('price', '<', 1000);

// OR conditions
await knex('products')
  .where('category', 'Electronics')
  .orWhere('category', 'Computers');

// IN clause
await knex('products')
  .whereIn('category', ['Electronics', 'Computers', 'Gaming']);

// BETWEEN
await knex('products')
  .whereBetween('price', [100, 500]);

// NULL checks
await knex('products')
  .whereNull('discontinued_at');

await knex('products')
  .whereNotNull('discontinued_at');

// LIKE for pattern matching
await knex('products')
  .where('name', 'like', '%laptop%');

Sorting and Limiting

// Sort ascending
await knex('products')
  .orderBy('price', 'asc');

// Sort descending
await knex('products')
  .orderBy('created_at', 'desc');

// Multiple sort columns
await knex('products')
  .orderBy([
    { column: 'category', order: 'asc' },
    { column: 'price', order: 'desc' }
  ]);

// Limit results
await knex('products')
  .limit(10);

// Pagination
async function getProducts(page = 1, perPage = 20) {
  const offset = (page - 1) * perPage;
  
  return knex('products')
    .select('*')
    .limit(perPage)
    .offset(offset)
    .orderBy('id');
}

3. JOINs - Combining Tables

JOINs let you combine data from multiple tables based on relationships.

Setup Example Tables

// Assume we have these tables:
// - customers (id, name, email)
// - orders (id, customer_id, total, order_date)
// - order_items (id, order_id, product_id, quantity)
// - products (id, name, price)

INNER JOIN - Only Matching Rows

Returns rows that have matches in both tables.

// Get orders with customer names
async function getOrdersWithCustomers() {
  return knex('orders as o')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select(
      'o.id as order_id',
      'o.total',
      'o.order_date',
      'c.name as customer_name',
      'c.email as customer_email'
    );
}

// Equivalent SQL:
// SELECT o.id as order_id, o.total, o.order_date, 
//        c.name as customer_name, c.email as customer_email
// FROM orders o
// INNER JOIN customers c ON o.customer_id = c.id

Use INNER JOIN when: You only want rows that have matching data in both tables.

LEFT JOIN - All from Left Table

Returns all rows from the left table, with matches from the right table (or NULL if no match).

// Get all customers and their orders (including customers with no orders)
async function getCustomersWithOrders() {
  return knex('customers as c')
    .leftJoin('orders as o', 'c.id', 'o.customer_id')
    .select(
      'c.id as customer_id',
      'c.name',
      'c.email',
      'o.id as order_id',
      'o.total',
      'o.order_date'
    )
    .orderBy('c.id');
}

// Result includes customers with no orders (order fields will be NULL)

Use LEFT JOIN when: You want all records from the main table, even if they don't have related records.

RIGHT JOIN - All from Right Table

Returns all rows from the right table, with matches from the left table (or NULL if no match).

// Get all orders and their customers (including orders with no customer)
async function getOrdersWithCustomersRight() {
  return knex('customers as c')
    .rightJoin('orders as o', 'c.id', 'o.customer_id')
    .select(
      'o.id as order_id',
      'o.total',
      'c.name as customer_name'
    );
}

Tip: RIGHT JOIN is rarely used - you can usually rewrite it as a LEFT JOIN by swapping the tables.

Multiple JOINs

// Get order items with product and customer details
async function getOrderDetails(orderId) {
  return knex('order_items as oi')
    .innerJoin('orders as o', 'oi.order_id', 'o.id')
    .innerJoin('products as p', 'oi.product_id', 'p.id')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select(
      'c.name as customer_name',
      'p.name as product_name',
      'p.price',
      'oi.quantity',
      knex.raw('p.price * oi.quantity as subtotal')
    )
    .where('o.id', orderId);
}

JOIN Comparison

JOIN Type Returns
INNER JOIN Only rows with matches in both tables
LEFT JOIN All rows from left table + matches from right
RIGHT JOIN All rows from right table + matches from left
FULL OUTER JOIN All rows from both tables (not supported in MySQL)

4. Aggregate Functions

Aggregate functions perform calculations on multiple rows and return a single result.

COUNT - Counting Rows

// Count all rows
async function getTotalUsers() {
  const result = await knex('users').count('* as total').first();
  return result.total;
}

// Count specific column (ignores NULL)
async function getVerifiedUsers() {
  const result = await knex('users')
    .count('email_verified_at as verified_count')
    .first();
  return result.verified_count;
}

// Count distinct values
async function getUniqueCategories() {
  const result = await knex('products')
    .countDistinct('category as category_count')
    .first();
  return result.category_count;
}

// Count with condition
async function getActiveUserCount() {
  const result = await knex('users')
    .where('status', 'active')
    .count('* as count')
    .first();
  return result.count;
}

SUM - Adding Values

// Sum of a column
async function getTotalRevenue() {
  const result = await knex('orders')
    .sum('total as revenue')
    .first();
  return result.revenue || 0;
}

// Sum with condition
async function getMonthlyRevenue(year, month) {
  const result = await knex('orders')
    .sum('total as revenue')
    .whereRaw('YEAR(order_date) = ?', [year])
    .andWhereRaw('MONTH(order_date) = ?', [month])
    .first();
  return result.revenue || 0;
}

AVG - Average Value

// Average order value
async function getAverageOrderValue() {
  const result = await knex('orders')
    .avg('total as average')
    .first();
  return result.average;
}

// Average with rounding
async function getAveragePrice() {
  const result = await knex('products')
    .avg('price as avg_price')
    .first();
  return Math.round(result.avg_price * 100) / 100;
}

MAX and MIN - Highest and Lowest

// Find highest and lowest prices
async function getPriceRange() {
  const result = await knex('products')
    .max('price as max_price')
    .min('price as min_price')
    .first();
  
  return {
    highest: result.max_price,
    lowest: result.min_price
  };
}

// Most recent order
async function getLatestOrder() {
  const result = await knex('orders')
    .max('order_date as latest_date')
    .first();
  return result.latest_date;
}

Multiple Aggregates at Once

// Get order statistics
async function getOrderStats() {
  const result = await knex('orders')
    .count('* as total_orders')
    .sum('total as total_revenue')
    .avg('total as avg_order_value')
    .max('total as largest_order')
    .min('total as smallest_order')
    .first();
  
  return result;
}

5. GROUP BY and HAVING

GROUP BY lets you group rows and calculate aggregates for each group.

Basic GROUP BY

// Count orders per customer
async function getOrderCountByCustomer() {
  return knex('orders')
    .select('customer_id')
    .count('* as order_count')
    .groupBy('customer_id');
}

// Revenue per customer
async function getRevenueByCustomer() {
  return knex('orders as o')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select(
      'c.id as customer_id',
      'c.name as customer_name'
    )
    .sum('o.total as total_spent')
    .groupBy('c.id', 'c.name')
    .orderBy('total_spent', 'desc');
}

Multiple Grouping Columns

// Orders by customer and status
async function getOrdersByCustomerAndStatus() {
  return knex('orders')
    .select('customer_id', 'status')
    .count('* as count')
    .groupBy('customer_id', 'status')
    .orderBy(['customer_id', 'status']);
}

// Sales by category and month
async function getSalesByCategory() {
  return knex('order_items as oi')
    .innerJoin('products as p', 'oi.product_id', 'p.id')
    .select(
      'p.category',
      knex.raw('DATE_FORMAT(oi.created_at, "%Y-%m") as month')
    )
    .sum('oi.quantity as units_sold')
    .sum(knex.raw('oi.quantity * oi.price as revenue'))
    .groupBy('p.category', knex.raw('DATE_FORMAT(oi.created_at, "%Y-%m")'))
    .orderBy('month', 'desc');
}

HAVING - Filter Groups

HAVING is like WHERE, but for grouped data. Use it to filter groups after aggregation.

// Customers who spent more than $1000
async function getHighValueCustomers() {
  return knex('orders as o')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select('c.id', 'c.name')
    .sum('o.total as total_spent')
    .groupBy('c.id', 'c.name')
    .having('total_spent', '>', 1000)
    .orderBy('total_spent', 'desc');
}

// Customers with more than 5 orders
async function getFrequentCustomers() {
  return knex('orders as o')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select('c.id', 'c.name')
    .count('o.id as order_count')
    .groupBy('c.id', 'c.name')
    .having('order_count', '>', 5);
}

// Products with average rating above 4.5
async function getTopRatedProducts() {
  return knex('product_reviews as r')
    .innerJoin('products as p', 'r.product_id', 'p.id')
    .select('p.id', 'p.name')
    .avg('r.rating as avg_rating')
    .count('r.id as review_count')
    .groupBy('p.id', 'p.name')
    .having('avg_rating', '>', 4.5)
    .andHaving('review_count', '>=', 10); // At least 10 reviews
}

WHERE vs HAVING

// ✅ Correct: Filter rows THEN group
async function getRevenueByActiveCustomers() {
  return knex('orders as o')
    .innerJoin('customers as c', 'o.customer_id', 'c.id')
    .select('c.id', 'c.name')
    .sum('o.total as revenue')
    .where('c.status', 'active')    // WHERE filters rows before grouping
    .groupBy('c.id', 'c.name')
    .having('revenue', '>', 500);    // HAVING filters groups after aggregation
}

Key Difference:

  • WHERE filters individual rows before grouping
  • HAVING filters groups after aggregation

6. Subqueries and CTEs

Subqueries - Query Within a Query

// Find customers who have never ordered
async function getCustomersWithNoOrders() {
  return knex('customers')
    .whereNotIn('id', function() {
      this.select('customer_id').from('orders');
    })
    .select('*');
}

// Products more expensive than average
async function getExpensiveProducts() {
  return knex('products')
    .where('price', '>', function() {
      this.avg('price').from('products');
    })
    .select('name', 'price');
}

// Customers who spent more than the average customer
async function getAboveAverageSpenders() {
  return knex('customers as c')
    .innerJoin('orders as o', 'c.id', 'o.customer_id')
    .select('c.id', 'c.name')
    .sum('o.total as total_spent')
    .groupBy('c.id', 'c.name')
    .having('total_spent', '>', function() {
      this.avg('customer_total')
        .from(
          knex('orders')
            .select('customer_id')
            .sum('total as customer_total')
            .groupBy('customer_id')
            .as('customer_totals')
        );
    });
}

CTEs (Common Table Expressions) - Named Subqueries

CTEs make complex queries more readable by naming intermediate results.

// Using raw SQL for CTE (Knex doesn't have built-in CTE support)
async function getTopCustomers() {
  return knex.raw(`
    WITH customer_stats AS (
      SELECT 
        c.id,
        c.name,
        COUNT(o.id) as order_count,
        SUM(o.total) as total_spent
      FROM customers c
      LEFT JOIN orders o ON c.id = o.customer_id
      GROUP BY c.id, c.name
    )
    SELECT *
    FROM customer_stats
    WHERE total_spent > 1000
    ORDER BY total_spent DESC
  `).then(result => result[0]);
}

// Multiple CTEs
async function getProductPerformance() {
  return knex.raw(`
    WITH 
    product_sales AS (
      SELECT 
        product_id,
        SUM(quantity) as units_sold,
        SUM(quantity * price) as revenue
      FROM order_items
      GROUP BY product_id
    ),
    product_reviews AS (
      SELECT 
        product_id,
        AVG(rating) as avg_rating,
        COUNT(*) as review_count
      FROM product_reviews
      GROUP BY product_id
    )
    SELECT 
      p.id,
      p.name,
      ps.units_sold,
      ps.revenue,
      pr.avg_rating,
      pr.review_count
    FROM products p
    LEFT JOIN product_sales ps ON p.id = ps.product_id
    LEFT JOIN product_reviews pr ON p.id = pr.product_id
    ORDER BY ps.revenue DESC
  `).then(result => result[0]);
}

When to Use Subqueries vs JOINs

Use Subqueries when:

  • Checking if something exists/doesn't exist
  • Comparing to an aggregate value
  • The subquery is simple and clear

Use JOINs when:

  • Combining columns from multiple tables
  • Better performance with proper indexes
  • The relationship is straightforward

7. Real-World Examples

Example 1: Customer Analytics Dashboard

async function getCustomerAnalytics(customerId) {
  // Get customer with order statistics
  const customerStats = await knex('customers as c')
    .leftJoin('orders as o', 'c.id', 'o.customer_id')
    .select(
      'c.id',
      'c.name',
      'c.email'
    )
    .count('o.id as total_orders')
    .sum('o.total as total_spent')
    .avg('o.total as avg_order_value')
    .max('o.order_date as last_order_date')
    .where('c.id', customerId)
    .groupBy('c.id', 'c.name', 'c.email')
    .first();
  
  // Get favorite products
  const favoriteProducts = await knex('order_items as oi')
    .innerJoin('orders as o', 'oi.order_id', 'o.id')
    .innerJoin('products as p', 'oi.product_id', 'p.id')
    .select('p.name as product_name')
    .sum('oi.quantity as times_purchased')
    .where('o.customer_id', customerId)
    .groupBy('p.id', 'p.name')
    .orderBy('times_purchased', 'desc')
    .limit(5);
  
  return {
    ...customerStats,
    favorite_products: favoriteProducts
  };
}

Example 2: Sales Report

async function getMonthlySalesReport(year) {
  return knex.raw(`
    WITH monthly_sales AS (
      SELECT 
        MONTH(order_date) as month,
        COUNT(*) as order_count,
        SUM(total) as revenue,
        AVG(total) as avg_order_value
      FROM orders
      WHERE YEAR(order_date) = ?
        AND status = 'completed'
      GROUP BY MONTH(order_date)
    )
    SELECT 
      month,
      order_count,
      revenue,
      avg_order_value,
      revenue - LAG(revenue) OVER (ORDER BY month) as revenue_growth
    FROM monthly_sales
    ORDER BY month
  `, [year]).then(result => result[0]);
}

Example 3: Product Search with Filters

async function searchProducts(filters) {
  let query = knex('products as p')
    .leftJoin('product_reviews as r', 'p.id', 'r.product_id')
    .select(
      'p.id',
      'p.name',
      'p.price',
      'p.category'
    )
    .avg('r.rating as avg_rating')
    .count('r.id as review_count')
    .groupBy('p.id', 'p.name', 'p.price', 'p.category');
  
  // Apply filters
  if (filters.category) {
    query = query.where('p.category', filters.category);
  }
  
  if (filters.minPrice) {
    query = query.where('p.price', '>=', filters.minPrice);
  }
  
  if (filters.maxPrice) {
    query = query.where('p.price', '<=', filters.maxPrice);
  }
  
  if (filters.search) {
    query = query.where('p.name', 'like', `%${filters.search}%`);
  }
  
  if (filters.minRating) {
    query = query.having('avg_rating', '>=', filters.minRating);
  }
  
  // Sort
  if (filters.sortBy === 'price_asc') {
    query = query.orderBy('p.price', 'asc');
  } else if (filters.sortBy === 'price_desc') {
    query = query.orderBy('p.price', 'desc');
  } else if (filters.sortBy === 'rating') {
    query = query.orderBy('avg_rating', 'desc');
  }
  
  // Pagination
  const page = filters.page || 1;
  const perPage = filters.perPage || 20;
  const offset = (page - 1) * perPage;
  
  query = query.limit(perPage).offset(offset);
  
  return query;
}

8. Quick Reference

CRUD Cheat Sheet

Operation Knex Method Returns
Create .insert() Array of IDs
Read .select() Array of rows
Update .update() Count of updated rows
Delete .del() Count of deleted rows

JOIN Quick Guide

// INNER JOIN - Only matches
knex('orders')
  .innerJoin('customers', 'orders.customer_id', 'customers.id')

// LEFT JOIN - All from left table
knex('customers')
  .leftJoin('orders', 'customers.id', 'orders.customer_id')

// RIGHT JOIN - All from right table  
knex('customers')
  .rightJoin('orders', 'customers.id', 'orders.customer_id')

Aggregate Functions

Function Purpose Example
COUNT Count rows .count('* as total')
SUM Add values .sum('price as total_price')
AVG Average value .avg('rating as avg_rating')
MAX Highest value .max('price as max_price')
MIN Lowest value .min('price as min_price')

WHERE vs HAVING

// WHERE - Filter rows before grouping
knex('orders')
  .where('status', 'completed')    // Filter individual rows
  .groupBy('customer_id')
  .count('* as order_count')

// HAVING - Filter groups after aggregation
knex('orders')
  .groupBy('customer_id')
  .count('* as order_count')
  .having('order_count', '>', 5)   // Filter grouped results

Common Patterns

Pagination:

const page = 1;
const perPage = 20;
const offset = (page - 1) * perPage;

knex('products')
  .limit(perPage)
  .offset(offset)
  .orderBy('id');

Search:

knex('products')
  .where('name', 'like', `%${searchTerm}%`)
  .orWhere('description', 'like', `%${searchTerm}%`);

Date Ranges:

knex('orders')
  .whereBetween('order_date', [startDate, endDate]);

Top N per Group:

knex.raw(`
  SELECT * FROM (
    SELECT 
      *,
      ROW_NUMBER() OVER (PARTITION BY category ORDER BY sales DESC) as rank
    FROM products
  ) ranked
  WHERE rank <= 5
`);

The Golden Rules

  1. Select specific columns - Avoid SELECT * in production
  2. Use JOINs over subqueries - Usually faster with proper indexes
  3. Filter before grouping - Use WHERE to reduce data early
  4. Add LIMIT to large queries - Always paginate results
  5. Use aggregate functions - Let the database do calculations
  6. Index JOIN columns - Foreign keys should have indexes
  7. Test with EXPLAIN - Check query performance

Common Query Patterns

Pattern 1: Find Records Without Related Data

// Customers with no orders
knex('customers')
  .whereNotIn('id', knex('orders').select('customer_id'))
  .select('*');

Pattern 2: Latest Record Per Group

// Latest order for each customer
knex.raw(`
  SELECT o.*
  FROM orders o
  INNER JOIN (
    SELECT customer_id, MAX(order_date) as max_date
    FROM orders
    GROUP BY customer_id
  ) latest ON o.customer_id = latest.customer_id 
    AND o.order_date = latest.max_date
`);

Pattern 3: Running Totals

// Cumulative revenue by month
knex.raw(`
  SELECT 
    month,
    revenue,
    SUM(revenue) OVER (ORDER BY month) as cumulative_revenue
  FROM monthly_sales
  ORDER BY month
`);

Next Steps

  1. Practice writing queries for each CRUD operation
  2. Experiment with different JOIN types
  3. Use aggregate functions to analyze your data
  4. Combine GROUP BY with HAVING for filtered summaries
  5. Try writing complex queries with CTEs
  6. Always check query performance with EXPLAIN

Remember: SQL is about telling the database what you want, not how to get it. Focus on clear, readable queries first, then optimize for performance if needed!