How to generate date series to occupy absent dates in google BiqQuery?
You can use below to generate on fly all dates in given range (in below example it is all dates from 2015-06-01 till CURRENT_DATE() - by changing those you can control which dates range to generate)
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
so, now - you can use it with LEFT JOIN with your table to have all dates accounted. See potential example below
SELECT
calendar_day,
IFNULL(sales, 0) AS sales
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
) AS all_dates
LEFT JOIN (
SELECT DAY(InvoiceDate) DATE, SUM(InvoiceAmount) sales
FROM test_gmail_com.sales
WHERE YEAR(InvoiceDate) = YEAR(CURRENT_DATE()) AND
MONTH(InvoiceDate) = MONTH(CURRENT_DATE())
GROUP BY DATE
)
ON DATE = calendar_day
I wanna need to get previous months sales
Below gives all days of previous month
SELECT DATE(DATE_ADD(DATE_ADD(DATE_ADD(CURRENT_DATE(), -1, "MONTH"), 1 - DAY(CURRENT_DATE()), "DAY"), pos - 1, "DAY")) AS calendar_day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(DATE_ADD(CURRENT_DATE(), - DAY(CURRENT_DATE()), "DAY"), DATE_ADD(DATE_ADD(CURRENT_DATE(), -1, "MONTH"), 1 - DAY(CURRENT_DATE()), "DAY")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
How to fill missing dates in BigQuery?
this should work
with base as (
select 'A' as name, '01/01/2020' as date, 1.5 as val union all
select 'A' as name, '01/03/2020' as date, 2 as val union all
select 'A' as name, '01/06/2020' as date, 5 as val union all
select 'B' as name, '01/02/2020' as date, 90 as val union all
select 'B' as name, '01/07/2020' as date, 10 as val
),
missing_dates as (
select name,dates as date from
UNNEST(GENERATE_DATE_ARRAY('2019-12-29', '2020-01-09', INTERVAL 1 DAY)) AS dates cross join (select distinct name from base)
), joined as (
select distinct missing_dates.name, missing_dates.date,val
from missing_dates
left join base on missing_dates.name = base.name
and parse_date('%m/%d/%Y', base.date) = missing_dates.date
)
select * except(val),
ifnull(first_value(val ignore nulls) over(partition by name order by date ROWS BETWEEN CURRENT ROW AND
UNBOUNDED FOLLOWING),1) as va1
from joined
BigQuery - is there a way to find missing dates in a data sequence?
Consider below query for missing dates:
CREATE TEMP TABLE sample AS
SELECT '123456' dataset_id, DATE '2022-04-11' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-04-12' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-06-01' date UNION ALL
SELECT '123456' dataset_id, DATE '2022-06-02' date;
SELECT date FROM UNNEST(GENERATE_DATE_ARRAY('2022-04-12', '2022-06-01')) date
EXCEPT DISTINCT
SELECT date FROM sample;
- GENERATE_DATE_ARRAY
- EXCEPT DISTINCT
Multiple Dataset Example
SELECT dataset_id, date FROM (
SELECT DISTINCT dataset_id FROM sample
),UNNEST(GENERATE_DATE_ARRAY('2022-04-12', '2022-06-01')) date
EXCEPT DISTINCT
SELECT dataset_id, date FROM sample;
Populating a table with all dates in a given range in Google BigQuery
all dates from 2015-06-01 till CURRENT_DATE()
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS DAY
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
all weeks between the two dates
SELECT YEAR(DAY) AS y, WEEK(DAY) AS w
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP("2015-06-01"), pos - 1, "DAY")) AS DAY
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP("2015-06-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
)
GROUP BY y, w
Aggregate by date and group and fill in missing dates in big query
Below example is for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.data` AS (
SELECT 1 id, '01/01/2014' start_date, '06/01/2014' end_date UNION ALL
SELECT 2, '10/01/2005', '12/01/2015' UNION ALL
SELECT 3, '08/01/2009', '10/01/2012'
), `project.dataset.amounts` AS (
SELECT 1 id, '02/01/2014' month_year, 100 amount UNION ALL
SELECT 1, '03/01/2007', 25 UNION ALL
SELECT 2, '10/01/2010', 50
), all_months AS (
SELECT id, FORMAT_DATE('%m/%d/%Y', month_year) month_year
FROM `project.dataset.data`,
UNNEST(GENERATE_DATE_ARRAY(PARSE_DATE('%m/%d/%Y', start_date), PARSE_DATE('%m/%d/%Y', end_date), INTERVAL 1 MONTH)) month_year
)
SELECT id, month_year, SUM(IFNULL(amount, 0)) amount
FROM all_months m
LEFT JOIN `project.dataset.amounts` a
USING (id, month_year)
GROUP BY id, month_year
Cannot generate date array with adding missing dates and fill with previously known value
Consider below approach
select
day as datetime_from,
a.listing_id,
last_value(price_cents_usd ignore nulls) over(partition by a.listing_id order by day rows between unbounded preceding and current row) price_cents_usd
from (select distinct listing_id from `data_marts.pg_listings_log`) a,
unnest(generate_date_array('2020-01-01', current_date)) day
left join `data_marts.pg_listings_log` b
on date(datetime_from) = day
and a.listing_id = b.listing_id
# order by datetime_from
How to generate date series in BigQuery and fill zeros for missing sales data
You have the left join with tables days
and sales
correctly set, but your where part will filter only the rows based on date in sales table (B.date_local). This date will be null if there is no sales transactions for that day.
Change your where part to filter on day
-field from days
-table. This will at least bring the days with zero sales to your result. The changed where clause would be like this:
...
WHERE DATE(A.day) BETWEEN "2021-04-01" AND CURRENT_DATE() ...
Filling missing dates in BigQuery (SQL) without creating a new calendar
Below is for BigQuery Standard SQL and built off of your current result
#standardSQL
WITH your_current_result AS (
......
), days AS (
SELECT day
FROM (
SELECT
MIN(DATE(TIMESTAMP(ProgressDate))) min_dt,
MAX(DATE(TIMESTAMP(ProgressDate))) max_dt
FROM your_current_result
), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt)) day
)
SELECT day,
LAST_VALUE(EstMin IGNORE NULLS) OVER(ORDER BY day) EstMin,
LAST_VALUE(EstMax IGNORE NULLS) OVER(ORDER BY day) EstMax
FROM days
LEFT JOIN your_current_result
ON day = DATE(TIMESTAMP(ProgressDate))
-- ORDER BY day
you can test, play with above using example of output in your question
#standardSQL
WITH your_current_result AS (
SELECT '2017-07-21T00:00:00Z' ProgressDate, 0.125 EstMin, 0.25 EstMax UNION ALL
SELECT '2017-07-24T00:00:00Z', 5.125, 5.375 UNION ALL
SELECT '2017-07-25T00:00:00Z', 8.75, 10.25 UNION ALL
SELECT '2017-07-26T00:00:00Z', 10.0, 12.0 UNION ALL
SELECT '2017-07-27T00:00:00Z', 10.5, 12.75 UNION ALL
SELECT '2017-08-01T00:00:00Z', 15.25, 19.125 UNION ALL
SELECT '2017-08-02T00:00:00Z', 15.5, 19.375 UNION ALL
SELECT '2017-08-05T00:00:00Z', 16.25, 20.625
), days AS (
SELECT day
FROM (
SELECT
MIN(DATE(TIMESTAMP(ProgressDate))) min_dt,
MAX(DATE(TIMESTAMP(ProgressDate))) max_dt
FROM your_current_result
), UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt)) day
)
SELECT day,
LAST_VALUE(EstMin IGNORE NULLS) OVER(ORDER BY day) EstMin,
LAST_VALUE(EstMax IGNORE NULLS) OVER(ORDER BY day) EstMax
FROM days
LEFT JOIN your_current_result
ON day = DATE(TIMESTAMP(ProgressDate))
ORDER BY day
with result
Row day EstMin EstMax
1 2017-07-21 0.125 0.25
2 2017-07-22 0.125 0.25
3 2017-07-23 0.125 0.25
4 2017-07-24 5.125 5.375
5 2017-07-25 8.75 10.25
6 2017-07-26 10.0 12.0
7 2017-07-27 10.5 12.75
8 2017-07-28 10.5 12.75
9 2017-07-29 10.5 12.75
10 2017-07-30 10.5 12.75
11 2017-07-31 10.5 12.75
12 2017-08-01 15.25 19.125
13 2017-08-02 15.5 19.375
14 2017-08-03 15.5 19.375
15 2017-08-04 15.5 19.375
16 2017-08-05 16.25 20.625
Related Topics
Writing a Subquery Using Zend Db
Sql: Aggregating Strings Together
How to Exclude a Column from Select Query
Truncate Timestamp to Arbitrary Intervals
Dbcc Checkident Sets Identity to 0
Is a Primary Key Necessary in SQL Server
Why Is Running a Query on SQL Azure So Much Slower
Subtract Hours from the Now() Function
Normalizing Accented Characters in MySQL Queries
Memory Effective Way to Read Blob Data in C#/SQL 2005
Why No Many-To-Many Relationships