Update Values in Struct Arrays in Bigquery

Update values in struct arrays in BigQuery

Below is for BigQuery Standard SQL

#standardSQL
UPDATE `project.dataset.table` t
SET weights =
ARRAY(
SELECT AS STRUCT IF(animal = 'dragon', 'cat', animal) animal, value
FROM t.weights
)
WHERE TRUE

Update struct or nested field in bigquery

Unfortunately BigQuery does not support updating separate fields of a query or a nested field. The following syntax is not supported:

UPDATE myTable
SET Employee.name = 'some string'
WHERE id = 10

We can update structs only as a whole:

UPDATE myTable
SET Employee = STRUCT('aaa', 'bbb')
WHERE id = 10

How do I update a field in the array of struct in BQ?

You need to replace the array itself, updating the desired value:

update  `mydataset.struct_3`
set address = ARRAY(SELECT IF(city="some_city", "some_uscity", city) FROM UNNEST(address))

Update Struct in BigQuery

Looking at the error message closely, the target column type is a STRUCT without Array

 STRUCT<string STRING, text STRING, provided STRING>

hereby as you hinted, the UNNEST is not necessary. try to update the STRUCT directly.

UPDATE `myTable` t
SET click_url =
(SELECT AS STRUCT(
t.click_url.string,
t.click_url.text,
your_hard_coded_provided_string)
)

WHERE id = 10

How to update a field in a nested array in Bigquery?

Below is for BigQuery Standard SQL

#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE

It is tested with below dummy data

initial dummy data that looks like below

[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]

after applying below adjustments

[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]

result is

[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]

How do I change the values in the array of struct in BQ



#standardSQL
SELECT
boxes,
ARRAY(
SELECT AS STRUCT
IF(colour = 'yellow', 'blue', colour) colour,
IF(colour = 'yellow', 5, id) id
FROM UNNEST(colors)
) colors
FROM `mydataset.struct_4`

Variation of above would be

#standardSQL
SELECT
boxes,
ARRAY(
SELECT IF(colour = 'yellow',
STRUCT<colour STRING,id INT64>('blue', 5),
STRUCT(colour, id))
FROM UNNEST(colors)
) colors
FROM `mydataset.struct_4`

with the same output of course

Row boxes   colors.colour   colors.id    
1 box_1 brown 1
green 3
white 7
2 box_2 blue 5
white 4

Update for: but how do I update the colour "yellow" to "blue" and its id to 5 using UPDATE DML statement :)

#standardSQL
UPDATE `mydataset.struct_4` SET colors = ARRAY(
SELECT IF(colour = 'yellow',
STRUCT<colour STRING,id INT64>('blue', 5),
STRUCT(colour, id))
FROM UNNEST(colors)
) WHERE TRUE

Bigquery update / insert in nested arrays and arrays of structs

I was finally able to nail the problem.
To merge 2 records, I had to resort to subqueries pushing in some work. Although, I still think there are chances of improvement to this code.

    -- INSERT IDs
INSERT `deep_test.main_table` (people_id)
(
SELECT distinct(people_id) FROM `deep_test.staging_test`
WHERE people_id NOT IN ( SELECT people_id FROM `deep_test.main_table` )
);

-- UPDATE TALENT RECORD
UPDATE
`deep_test.main_table` gold
SET
talent = B.talent
FROM
(
SELECT
gold.people_id as people_id,
ARRAY_AGG(aggregated_stage.talent) as talent
FROM
`deep_test.main_table` gold
JOIN
(
SELECT
A.people_id,
A.talent
FROM
(
SELECT
ARRAY_AGG( t
ORDER BY
t.createdAt DESC LIMIT 1 )[OFFSET(0)] A
FROM
`deep_test.staging_test` t
GROUP BY
t.people_id,
t.talent.people_l_id,
t.talent.fiscalYear
)
) as aggregated_stage
ON gold.people_id = aggregated_stage.people_id
WHERE aggregated_stage.talent is not null
GROUP BY people_id
)
B
WHERE
B.people_id = gold.people_id;

-- UPDATE COUNTRY CODE
UPDATE `deep_test.core` core
set core.country_code = countries.number
FROM
(
select people_id , (select country from UNNEST(talent) as d order by d.fiscalYear DESC limit 1) as country FROM `deep_test.core`
) B, `deep_test.countries` countries
WHERE
core.people_id = B.people_id
AND countries.code = B.country;

This creates a subquery and assigns the results to a variable. This variable can be used as a table in for querying and joining the results with another table.



Related Topics



Leave a reply



Submit