How to Generate Date Series to Occupy Absent Dates in Google Biqquery

How to generate date series to occupy absent dates in google BiqQuery?

You can use below to generate on fly all dates in given range (in below example it is all dates from 2015-06-01 till CURRENT_DATE() - by changing those you can control which dates range to generate)

SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))

so, now - you can use it with LEFT JOIN with your table to have all dates accounted. See potential example below

SELECT
calendar_day,
IFNULL(sales, 0) AS sales
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
) AS all_dates
LEFT JOIN (
SELECT DAY(InvoiceDate) DATE, SUM(InvoiceAmount) sales
FROM test_gmail_com.sales
WHERE YEAR(InvoiceDate) = YEAR(CURRENT_DATE()) AND
MONTH(InvoiceDate) = MONTH(CURRENT_DATE())
GROUP BY DATE
)
ON DATE = calendar_day

I wanna need to get previous months sales

Below gives all days of previous month

SELECT DATE(DATE_ADD(DATE_ADD(DATE_ADD(CURRENT_DATE(), -1, "MONTH"), 1 - DAY(CURRENT_DATE()), "DAY"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(DATE_ADD(CURRENT_DATE(), - DAY(CURRENT_DATE()), "DAY"), DATE_ADD(DATE_ADD(CURRENT_DATE(), -1, "MONTH"), 1 - DAY(CURRENT_DATE()), "DAY")), '.'),'') AS h
FROM (SELECT NULL)),h
)))

How to fill missing dates in BigQuery?

this should work

with base as (

select 'A' as name, '01/01/2020' as date, 1.5 as val union all
select 'A' as name, '01/03/2020' as date, 2 as val union all
select 'A' as name, '01/06/2020' as date, 5 as val union all
select 'B' as name, '01/02/2020' as date, 90 as val union all
select 'B' as name, '01/07/2020' as date, 10 as val
),

missing_dates as (

select name,dates as date from
UNNEST(GENERATE_DATE_ARRAY('2019-12-29', '2020-01-09', INTERVAL 1 DAY)) AS dates cross join (select distinct name from base)

), joined as (
select distinct missing_dates.name, missing_dates.date,val
from missing_dates
left join base on missing_dates.name = base.name
and parse_date('%m/%d/%Y', base.date) = missing_dates.date

)

select * except(val),
ifnull(first_value(val ignore nulls) over(partition by name order by date ROWS BETWEEN CURRENT ROW AND
UNBOUNDED FOLLOWING),1) as va1
from joined

BigQuery - is there a way to find missing dates in a data sequence?

Consider below query for missing dates:

CREATE TEMP TABLE sample AS 
SELECT '123456' dataset_id, DATE '2022-04-11' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-04-12' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-06-01' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-06-02' date;

SELECT date FROM UNNEST(GENERATE_DATE_ARRAY('2022-04-12', '2022-06-01')) date
EXCEPT DISTINCT
SELECT date FROM sample;
  • GENERATE_DATE_ARRAY
  • EXCEPT DISTINCT
Multiple Dataset Example

SELECT dataset_id, date FROM (
SELECT DISTINCT dataset_id FROM sample
),UNNEST(GENERATE_DATE_ARRAY('2022-04-12', '2022-06-01')) date
EXCEPT DISTINCT
SELECT dataset_id, date FROM sample;

Populating a table with all dates in a given range in Google BigQuery

all dates from 2015-06-01 till CURRENT_DATE()

SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS DAY
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))

all weeks between the two dates

SELECT YEAR(DAY) AS y, WEEK(DAY) AS w
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS DAY
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
)
GROUP BY y, w

Aggregate by date and group and fill in missing dates in big query

Below example is for BigQuery Standard SQL

#standardSQL
WITH `project.dataset.data` AS (
SELECT 1 id, '01/01/2014' start_date, '06/01/2014' end_date UNION ALL
SELECT 2, '10/01/2005', '12/01/2015' UNION ALL
SELECT 3, '08/01/2009', '10/01/2012'
), `project.dataset.amounts` AS (
SELECT 1 id, '02/01/2014' month_year, 100 amount UNION ALL
SELECT 1, '03/01/2007', 25 UNION ALL
SELECT 2, '10/01/2010', 50
), all_months AS (
SELECT id, FORMAT_DATE('%m/%d/%Y', month_year) month_year
FROM `project.dataset.data`,
UNNEST(GENERATE_DATE_ARRAY(PARSE_DATE('%m/%d/%Y', start_date), PARSE_DATE('%m/%d/%Y', end_date), INTERVAL 1 MONTH)) month_year
)
SELECT id, month_year, SUM(IFNULL(amount, 0)) amount
FROM all_months m
LEFT JOIN `project.dataset.amounts` a
USING (id, month_year)
GROUP BY id, month_year

Cannot generate date array with adding missing dates and fill with previously known value

Consider below approach

select 
day as datetime_from,
a.listing_id,
last_value(price_cents_usd ignore nulls) over(partition by a.listing_id order by day rows between unbounded preceding and current row) price_cents_usd
from (select distinct listing_id from `data_marts.pg_listings_log`) a,
unnest(generate_date_array('2020-01-01', current_date)) day
left join `data_marts.pg_listings_log` b
on date(datetime_from) = day
and a.listing_id = b.listing_id
# order by datetime_from

How to generate date series in BigQuery and fill zeros for missing sales data

You have the left join with tables days and sales correctly set, but your where part will filter only the rows based on date in sales table (B.date_local). This date will be null if there is no sales transactions for that day.

Change your where part to filter on day -field from days-table. This will at least bring the days with zero sales to your result. The changed where clause would be like this:

...
WHERE DATE(A.day) BETWEEN "2021-04-01" AND CURRENT_DATE() ...

Filling missing dates in BigQuery (SQL) without creating a new calendar

Below is for BigQuery Standard SQL and built off of your current result

#standardSQL
WITH your_current_result AS (
......
), days AS (
SELECT day
FROM (
SELECT
MIN(DATE(TIMESTAMP(ProgressDate))) min_dt,
MAX(DATE(TIMESTAMP(ProgressDate))) max_dt
FROM your_current_result
), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt)) day
)
SELECT day,
LAST_VALUE(EstMin IGNORE NULLS) OVER(ORDER BY day) EstMin,
LAST_VALUE(EstMax IGNORE NULLS) OVER(ORDER BY day) EstMax
FROM days
LEFT JOIN your_current_result
ON day = DATE(TIMESTAMP(ProgressDate))
-- ORDER BY day

you can test, play with above using example of output in your question

#standardSQL
WITH your_current_result AS (
SELECT '2017-07-21T00:00:00Z' ProgressDate, 0.125 EstMin, 0.25 EstMax UNION ALL
SELECT '2017-07-24T00:00:00Z', 5.125, 5.375 UNION ALL
SELECT '2017-07-25T00:00:00Z', 8.75, 10.25 UNION ALL
SELECT '2017-07-26T00:00:00Z', 10.0, 12.0 UNION ALL
SELECT '2017-07-27T00:00:00Z', 10.5, 12.75 UNION ALL
SELECT '2017-08-01T00:00:00Z', 15.25, 19.125 UNION ALL
SELECT '2017-08-02T00:00:00Z', 15.5, 19.375 UNION ALL
SELECT '2017-08-05T00:00:00Z', 16.25, 20.625
), days AS (
SELECT day
FROM (
SELECT
MIN(DATE(TIMESTAMP(ProgressDate))) min_dt,
MAX(DATE(TIMESTAMP(ProgressDate))) max_dt
FROM your_current_result
), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt)) day
)
SELECT day,
LAST_VALUE(EstMin IGNORE NULLS) OVER(ORDER BY day) EstMin,
LAST_VALUE(EstMax IGNORE NULLS) OVER(ORDER BY day) EstMax
FROM days
LEFT JOIN your_current_result
ON day = DATE(TIMESTAMP(ProgressDate))
ORDER BY day

with result

Row day         EstMin  EstMax   
1 2017-07-21 0.125 0.25
2 2017-07-22 0.125 0.25
3 2017-07-23 0.125 0.25
4 2017-07-24 5.125 5.375
5 2017-07-25 8.75 10.25
6 2017-07-26 10.0 12.0
7 2017-07-27 10.5 12.75
8 2017-07-28 10.5 12.75
9 2017-07-29 10.5 12.75
10 2017-07-30 10.5 12.75
11 2017-07-31 10.5 12.75
12 2017-08-01 15.25 19.125
13 2017-08-02 15.5 19.375
14 2017-08-03 15.5 19.375
15 2017-08-04 15.5 19.375
16 2017-08-05 16.25 20.625


Related Topics



Leave a reply



Submit