Temporal, Funnel, Retention Drills
State the time boundary before opening the answer.
1. Compute new users by signup date.
SELECT signup_date, COUNT(*) AS new_users
FROM customers
GROUP BY signup_date;
If signup_date is derived from timestamp, use the engine’s date conversion consistently with timezone policy.
2. Compute daily active users from events.
SELECT event_date, COUNT(DISTINCT user_id) AS dau
FROM events
GROUP BY event_date;
Define bot filtering and identity resolution before productionizing.
3. Compute 7-day rolling active users.
WITH user_days AS (
SELECT DISTINCT user_id, event_date
FROM events
),
rolling AS (
SELECT
d.calendar_date,
u.user_id
FROM date_spine d
JOIN user_days u
ON u.event_date <= d.calendar_date
AND u.event_date > d.calendar_date - INTERVAL '7' DAY
)
SELECT calendar_date, COUNT(DISTINCT user_id) AS wau
FROM rolling
GROUP BY calendar_date;
This can be expensive at scale; pre-aggregate user-day activity or use sketches if approximate counts are acceptable.
4. Compute signup-to-first-purchase conversion within 14 days.
WITH first_purchase AS (
SELECT customer_id, MIN(order_ts) AS first_purchase_ts
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT
COUNT(*) AS signups,
SUM(
CASE
WHEN f.first_purchase_ts >= c.signup_ts
AND f.first_purchase_ts < c.signup_ts + INTERVAL '14' DAY
THEN 1 ELSE 0
END
) AS converted_14d
FROM customers c
LEFT JOIN first_purchase f
ON f.customer_id = c.customer_id;
5. Build monthly retention from first activity month.
WITH first_seen AS (
SELECT user_id, MIN(event_month) AS cohort_month
FROM events
GROUP BY user_id
),
activity AS (
SELECT DISTINCT user_id, event_month
FROM events
)
SELECT
f.cohort_month,
a.event_month,
COUNT(*) AS retained_users
FROM first_seen f
JOIN activity a
ON a.user_id = f.user_id
AND a.event_month >= f.cohort_month
GROUP BY f.cohort_month, a.event_month;
Compute cohort age with engine-specific date diff.
6. Find users active yesterday but not today.
WITH yesterday AS (
SELECT DISTINCT user_id
FROM events
WHERE event_date = DATE '2026-06-30'
),
today AS (
SELECT DISTINCT user_id
FROM events
WHERE event_date = DATE '2026-07-01'
)
SELECT y.user_id
FROM yesterday y
WHERE NOT EXISTS (
SELECT 1
FROM today t
WHERE t.user_id = y.user_id
);
7. Find reactivated users: inactive for 30 days, then active today.
WITH today AS (
SELECT DISTINCT user_id
FROM events
WHERE event_date = DATE '2026-07-01'
)
SELECT t.user_id
FROM today t
WHERE NOT EXISTS (
SELECT 1
FROM events e
WHERE e.user_id = t.user_id
AND e.event_date >= DATE '2026-06-01'
AND e.event_date < DATE '2026-07-01'
);
This uses a simple fixed 30-day lookback.
8. Compute first event after signup for every customer.
WITH ranked AS (
SELECT
c.customer_id,
e.event_id,
e.event_ts,
ROW_NUMBER() OVER (
PARTITION BY c.customer_id
ORDER BY e.event_ts, e.event_id
) AS rn
FROM customers c
JOIN events e
ON e.user_id = c.customer_id
AND e.event_ts >= c.signup_ts
)
SELECT customer_id, event_id, event_ts
FROM ranked
WHERE rn = 1;
9. Build a view-to-cart-to-purchase funnel with ordering.
WITH steps AS (
SELECT
user_id,
MIN(CASE WHEN event_name = 'view_product' THEN event_ts END) AS view_ts,
MIN(CASE WHEN event_name = 'add_to_cart' THEN event_ts END) AS cart_ts,
MIN(CASE WHEN event_name = 'purchase' THEN event_ts END) AS purchase_ts
FROM events
GROUP BY user_id
)
SELECT
COUNT(*) AS users,
SUM(CASE WHEN view_ts IS NOT NULL THEN 1 ELSE 0 END) AS viewed,
SUM(CASE WHEN cart_ts > view_ts THEN 1 ELSE 0 END) AS carted_after_view,
SUM(CASE WHEN purchase_ts > cart_ts THEN 1 ELSE 0 END) AS purchased_after_cart
FROM steps;
For repeated funnels, this simple first-step approach may not be enough.
10. Find purchases within 24 hours after a product view.
SELECT DISTINCT v.user_id
FROM events v
JOIN events p
ON p.user_id = v.user_id
AND p.event_name = 'purchase'
AND p.event_ts > v.event_ts
AND p.event_ts <= v.event_ts + INTERVAL '24' HOUR
WHERE v.event_name = 'view_product';
At scale, prefilter both sides and watch for many-to-many event joins.
11. Sessionize events with a 30-minute inactivity gap.
WITH ordered AS (
SELECT
user_id,
event_id,
event_ts,
LAG(event_ts) OVER (
PARTITION BY user_id
ORDER BY event_ts, event_id
) AS prev_event_ts
FROM events
),
marked AS (
SELECT
*,
CASE
WHEN prev_event_ts IS NULL THEN 1
WHEN event_ts > prev_event_ts + INTERVAL '30' MINUTE THEN 1
ELSE 0
END AS new_session
FROM ordered
)
SELECT
user_id,
SUM(new_session) OVER (
PARTITION BY user_id
ORDER BY event_ts, event_id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS session_number,
event_id,
event_ts
FROM marked;
12. Find users with a 5-day active streak.
WITH active_days AS (
SELECT DISTINCT user_id, day_number
FROM daily_user_activity
),
numbered AS (
SELECT
user_id,
day_number,
day_number - ROW_NUMBER() OVER (
PARTITION BY user_id
ORDER BY day_number
) AS island_key
FROM active_days
),
streaks AS (
SELECT user_id, COUNT(*) AS streak_days
FROM numbered
GROUP BY user_id, island_key
)
SELECT DISTINCT user_id
FROM streaks
WHERE streak_days >= 5;
13. Find gaps longer than 14 days between orders.
WITH ordered AS (
SELECT
customer_id,
order_id,
order_ts,
LAG(order_ts) OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS prev_order_ts
FROM orders
WHERE status = 'paid'
)
SELECT customer_id, order_id, prev_order_ts, order_ts
FROM ordered
WHERE order_ts > prev_order_ts + INTERVAL '14' DAY;
14. Detect overlapping customer plan intervals.
SELECT
a.customer_id,
a.plan_id AS plan_a,
b.plan_id AS plan_b
FROM customer_plan_intervals a
JOIN customer_plan_intervals b
ON b.customer_id = a.customer_id
AND b.plan_id > a.plan_id
AND a.valid_from < b.valid_to
AND b.valid_from < a.valid_to;
For half-open intervals, overlap is a.start < b.end AND b.start < a.end.
15. Join orders to the customer plan active at order time.
SELECT o.order_id, o.customer_id, p.plan_name
FROM orders o
JOIN customer_plan_intervals p
ON p.customer_id = o.customer_id
AND o.order_ts >= p.valid_from
AND o.order_ts < p.valid_to;
This is an interval join.
16. Find customers with no activity in the 90 days after signup.
SELECT c.customer_id
FROM customers c
WHERE NOT EXISTS (
SELECT 1
FROM events e
WHERE e.user_id = c.customer_id
AND e.event_ts >= c.signup_ts
AND e.event_ts < c.signup_ts + INTERVAL '90' DAY
);
17. Compute order frequency in the first 30 days after signup.
SELECT
c.customer_id,
COUNT(o.order_id) AS orders_30d
FROM customers c
LEFT JOIN orders o
ON o.customer_id = c.customer_id
AND o.order_ts >= c.signup_ts
AND o.order_ts < c.signup_ts + INTERVAL '30' DAY
AND o.status = 'paid'
GROUP BY c.customer_id;
18. Find first purchase after first product view.
WITH first_view AS (
SELECT user_id, MIN(event_ts) AS first_view_ts
FROM events
WHERE event_name = 'view_product'
GROUP BY user_id
),
purchases_after AS (
SELECT
v.user_id,
MIN(e.event_ts) AS first_purchase_after_view_ts
FROM first_view v
JOIN events e
ON e.user_id = v.user_id
AND e.event_name = 'purchase'
AND e.event_ts > v.first_view_ts
GROUP BY v.user_id
)
SELECT *
FROM purchases_after;
19. Create a cohort table with zero retained users included.
Use a cohort spine crossed with age periods, then left join activity.
WITH cohort_sizes AS (
SELECT cohort_month, COUNT(*) AS cohort_users
FROM user_cohorts
GROUP BY cohort_month
),
cohort_age_spine AS (
SELECT c.cohort_month, a.age_month
FROM cohort_sizes c
CROSS JOIN age_spine a
),
retention AS (
SELECT cohort_month, age_month, COUNT(DISTINCT user_id) AS retained_users
FROM user_month_activity
GROUP BY cohort_month, age_month
)
SELECT
s.cohort_month,
s.age_month,
COALESCE(r.retained_users, 0) AS retained_users
FROM cohort_age_spine s
LEFT JOIN retention r
ON r.cohort_month = s.cohort_month
AND r.age_month = s.age_month;
20. Find late-arriving events where ingest date is more than three days after event date.
SELECT event_id, event_ts, ingest_ts
FROM raw_events
WHERE ingest_ts >= event_ts + INTERVAL '3' DAY;
Timestamp difference syntax can vary. Use this as a semantic sketch.
21. Compute paid revenue by event date and processing date.
SELECT
order_date,
processing_date,
SUM(total_amount) AS paid_revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_date, processing_date;
This helps separate business event timing from pipeline arrival timing.
22. Find users whose first purchase happened before signup.
WITH first_purchase AS (
SELECT customer_id, MIN(order_ts) AS first_purchase_ts
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT c.customer_id, c.signup_ts, f.first_purchase_ts
FROM customers c
JOIN first_purchase f
ON f.customer_id = c.customer_id
WHERE f.first_purchase_ts < c.signup_ts;
This is usually a data quality or identity stitching issue.
23. Compute cancellation within 7 days of paid order.
SELECT
o.order_id,
CASE
WHEN EXISTS (
SELECT 1
FROM order_events e
WHERE e.order_id = o.order_id
AND e.event_name = 'cancelled'
AND e.event_ts > o.order_ts
AND e.event_ts <= o.order_ts + INTERVAL '7' DAY
)
THEN 1 ELSE 0
END AS cancelled_7d
FROM orders o
WHERE o.status = 'paid';
24. Find the first event of each session from a sessionized events table.
WITH ranked AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY user_id, session_number
ORDER BY event_ts, event_id
) AS rn
FROM sessionized_events
)
SELECT user_id, session_number, event_id, event_ts
FROM ranked
WHERE rn = 1;
25. Explain event time vs processing time in one sentence.
Event time is when the business action happened; processing time is when the data system observed or processed it. Business metrics usually use event time, while pipeline monitoring often uses processing time.