Duplicate Groups of Records to Fill Multiple Date Gaps in Google Bigquery

Duplicate groups of records to fill multiple date gaps in Google BigQuery

Try below


#standardSQL
WITH history AS (
SELECT '2017-01-01' AS d, 'a' AS product, 'x' AS partner, 10 AS value UNION ALL
SELECT '2017-01-01' AS d, 'b' AS product, 'x' AS partner, 15 AS value UNION ALL
SELECT '2017-01-01' AS d, 'a' AS product, 'y' AS partner, 11 AS value UNION ALL
SELECT '2017-01-01' AS d, 'b' AS product, 'y' AS partner, 16 AS value UNION ALL
SELECT '2017-01-05' AS d, 'b' AS product, 'x' AS partner, 13 AS value UNION ALL
SELECT '2017-01-07' AS d, 'a' AS product, 'y' AS partner, 15 AS value UNION ALL
SELECT '2017-01-07' AS d, 'a' AS product, 'x' AS partner, 15 AS value
),
daterange AS (
SELECT date_in_range
FROM UNNEST(GENERATE_DATE_ARRAY('2017-01-01', '2017-01-10')) AS date_in_range
),
temp AS (
SELECT d, product, partner, value, LEAD(d) OVER(PARTITION BY product, partner ORDER BY d) AS next_d
FROM history
ORDER BY product, partner, d
)
SELECT date_in_range, product, partner, value
FROM daterange
JOIN temp
ON daterange.date_in_range >= PARSE_DATE('%Y-%m-%d', temp.d)
AND (daterange.date_in_range < PARSE_DATE('%Y-%m-%d', temp.next_d) OR temp.next_d IS NULL)
ORDER BY product, partner, date_in_range

BigQuery/SQL: Filling in gaps as duplicate rows between years

Below is for BigQuery Standard SQL

#standardSQL
WITH history AS (
SELECT 2012 AS d, 'a' AS product, 'x' AS partner, 10 AS value UNION ALL
SELECT 2010 AS d, 'b' AS product, 'x' AS partner, 15 AS value UNION ALL
SELECT 2014 AS d, 'a' AS product, 'y' AS partner, 11 AS value UNION ALL
SELECT 2012 AS d, 'b' AS product, 'y' AS partner, 16 AS value UNION ALL
SELECT 2015 AS d, 'b' AS product, 'x' AS partner, 13 AS value UNION ALL
SELECT 2017 AS d, 'a' AS product, 'y' AS partner, 15 AS value UNION ALL
SELECT 2017 AS d, 'a' AS product, 'x' AS partner, 15 AS value
),
daterange AS (
SELECT EXTRACT(YEAR FROM fiscalYear) AS date_in_range
FROM UNNEST(
GENERATE_DATE_ARRAY(DATE('2010-01-01'), CURRENT_DATE(), INTERVAL 1 YEAR)
) AS fiscalYear),
history_ext AS (
SELECT date_in_range, x.product, x.partner, value
FROM daterange dr
CROSS JOIN (SELECT DISTINCT product, partner FROM history) x
LEFT JOIN history h
ON dr.date_in_range = h.d
AND STRUCT(h.product, h.partner) = STRUCT(x.product, x.partner)
)
SELECT date_in_range, product, partner,
COALESCE(
value,
LAST_VALUE(value IGNORE NULLS) OVER(PARTITION BY product, partner ORDER BY date_in_range ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
FIRST_VALUE(value IGNORE NULLS) OVER(PARTITION BY product, partner ORDER BY date_in_range ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
) AS value
FROM history_ext
ORDER BY product, partner, date_in_range

and returns

Row date_in_range   product partner value    
1 2010 a x 10
2 2011 a x 10
3 2012 a x 10
4 2013 a x 10
5 2014 a x 10
6 2015 a x 10
7 2016 a x 10
8 2017 a x 15
9 2018 a x 15
10 2019 a x 15
11 2010 a y 11
12 2011 a y 11
13 2012 a y 11
14 2013 a y 11
15 2014 a y 11
16 2015 a y 11
17 2016 a y 11
18 2017 a y 15
19 2018 a y 15
20 2019 a y 15
21 2010 b x 15
22 2011 b x 15
23 2012 b x 15
24 2013 b x 15
25 2014 b x 15
26 2015 b x 13
27 2016 b x 13
28 2017 b x 13
29 2018 b x 13
30 2019 b x 13
31 2010 b y 16
32 2011 b y 16
33 2012 b y 16
34 2013 b y 16
35 2014 b y 16
36 2015 b y 16
37 2016 b y 16
38 2017 b y 16
39 2018 b y 16
40 2019 b y 16

Duplicating records to fill gap between dates in Google BigQuery

How can I build a query within Google BigQuery that yields an output like the one below? A value at a given date is repeated until the next change for the dates in between

See example below

SELECT
MODIFY_DATE,
MAX(SKU_TEMP) OVER(PARTITION BY grp) AS SKU,
MAX(STORE_TEMP) OVER(PARTITION BY grp) AS STORE,
MAX(STOCK_ON_HAND_TEMP) OVER(PARTITION BY grp) AS STOCK_ON_HAND,
FROM (
SELECT
DAY AS MODIFY_DATE, SKU AS SKU_TEMP, STORE AS STORE_TEMP, STOCK_ON_HAND AS STOCK_ON_HAND_TEMP,
COUNT(SKU) OVER(ORDER BY DAY ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS grp,
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP("2016-08-01"), pos - 1, "DAY")) AS DAY
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP("2016-08-07"), TIMESTAMP("2016-08-01")), '.'),'') AS h
FROM (SELECT NULL)),h
)))
) AS DATES
LEFT JOIN (
SELECT DATE(MODIFY_DATE) AS MODIFY_DATE, SKU, STORE, STOCK_ON_HAND
FROM
(SELECT "2016-08-01" AS MODIFY_DATE, "1120010" AS SKU, 21 AS STORE, 75 AS STOCK_ON_HAND),
(SELECT "2016-08-05" AS MODIFY_DATE, "1120010" AS SKU, 22 AS STORE, 100 AS STOCK_ON_HAND),
(SELECT "2016-08-07" AS MODIFY_DATE, "1120011" AS SKU, 23 AS STORE, 40 AS STOCK_ON_HAND),
) AS TABLE_WITH_GAPS
ON TABLE_WITH_GAPS.MODIFY_DATE = DATES.DAY
)
ORDER BY MODIFY_DATE

How to duplicate rows generating dates between Start Date and End Date in BigQuery?

Below is for BigQuery Standard SQL

#standardSQL
WITH `project.dataset.table` AS (
SELECT 'A' user_name, DATE '2019-07-01' start_date, DATE '2019-07-31' end_date
)
SELECT user_name, start_date, end_date, day
FROM `project.dataset.table`,
UNNEST(GENERATE_DATE_ARRAY(start_date, end_date)) day
ORDER BY user_name, day

with result

Row user_name   start_date  end_date    day  
1 A 2019-07-01 2019-07-31 2019-07-01
2 A 2019-07-01 2019-07-31 2019-07-02
3 A 2019-07-01 2019-07-31 2019-07-03
. . .
29 A 2019-07-01 2019-07-31 2019-07-29
30 A 2019-07-01 2019-07-31 2019-07-30
31 A 2019-07-01 2019-07-31 2019-07-31

Google BigQuery SQL: How to fill in gaps in a table with dates?

Consider below

with temp as (
select customer, dates from (
select customer, min(dates) min_date, max(dates) max_date
from `project.dataset.table`
group by customer
), unnest(generate_date_array(min_date, max_date)) dates
)
select customer, dates,
first_value(subscription ignore nulls) over win as subscription
from temp a
left join `project.dataset.table` b
using(customer, dates)
window win as (partition by customer order by dates desc rows between current row and unbounded following)
# order by dates, customer

If to apply to sample data in y our question - output is

Sample Image

How to fill missing values for missing dates with value from date before in sql bigquery?

Consider below:

WITH days_by_id AS (
SELECT id, GENERATE_DATE_ARRAY(MIN(date), MAX(date)) days
FROM sample
GROUP BY id
)
SELECT date, id,
IFNULL(price, LAST_VALUE(price IGNORE NULLS) OVER (PARTITION BY id ORDER BY date)) AS price
FROM days_by_id, UNNEST(days) date LEFT JOIN sample USING (id, date);

output :

Sample Image

BigQUery repeating date array for a list of values

Consider below

with dates as (
select extract(year from date) year,
'Q' || extract(quarter from date) quarter
from unnest(generate_date_array('2021-01-01', '2021-12-31',interval 1 quarter)) as date
), partners as (
select partner from unnest(["Partner 1","Partner 2"]) as partner
)
select partner,
array_agg(struct(year, quarter)) calendar
from partners, dates
group by partner

with output

Sample Image

PIVOTing BigQuery tables on DATE type

From Rules for pivot_column:

A pivot_column must be a constant.

DATE('2020-01-01') is an expression, not a constant. So you need to use one of followings.

PIVOT(sum(sales) FOR dates IN (DATE '2020-01-01', ...) -- explicit DATE literal
-- or
PIVOT(sum(sales) FOR dates IN ('2020-01-01', ...) -- literal implicitly coerced to DATE type
-- or
PIVOT(sum(sales) FOR dates IN (DATE('2020-01-01') AS _2020_01_01, ...) -- alias
Dynamic SQL Example
EXECUTE IMMEDIATE FORMAT("""
SELECT * FROM Produce
PIVOT (SUM(sales) FOR dates IN (%s))
""", TRIM(TO_JSON_STRING(GENERATE_DATE_ARRAY('2020-01-01', '2020-01-4')), '[]'));

Sample Image

  • EXECUTE IMMEDIATE


Related Topics



Leave a reply



Submit