BigQuery UPDATE nested array field
Below is tested!
Leaves whole table's schema/data as is and only updates values of productCategoryAttribute based on the respective mapping
#standardSQL
UPDATE `project.dataset.your_table` t
SET hits =
ARRAY(
SELECT AS STRUCT * REPLACE(
ARRAY(
SELECT AS STRUCT product.* REPLACE(
CASE WHEN map.raw_name = product.productCategory THEN category
ELSE productCategoryAttribute END AS productCategoryAttribute)
FROM UNNEST(product) product
LEFT JOIN UNNEST(agg_map.map) map
ON map.raw_name = product.productCategory
) AS product)
FROM UNNEST(hits) hit
)
FROM (SELECT ARRAY_AGG(row) map FROM `project.dataset.map` row) agg_map
WHERE TRUE
Note: above solution assumes that map table is not that big as it relies on aggregating whole map table into one array
Updating nested array in BigQuery based on value in another column
Below is for BigQuery Standard SQL
See corrected version of very yours code
SET
event_params = ARRAY(
SELECT AS STRUCT * REPLACE(
CASE WHEN event_param.key = 'programName' THEN
(SELECT AS STRUCT * REPLACE('anonymous' AS string_value) FROM UNNEST([value]))
ELSE value
END AS value
)
FROM UNNEST(event_params) AS event_param
)
Also, note - you can remove reference to event_param
as in below example
SET
event_params = ARRAY(
SELECT AS STRUCT * REPLACE(
CASE WHEN key = 'programName' THEN
(SELECT AS STRUCT * REPLACE('anonymous' AS string_value) FROM UNNEST([value]))
ELSE value
END AS value
)
FROM UNNEST(event_params)
)
How to update a field in a nested array in Bigquery?
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]
Update nested field in BigQuery table
hits
is an array, so you need to use an array subquery to assign to it. It would look something like this:
#standardSQL
UPDATE `dataset_name`.`ga_sessions_20170705`
SET hits =
ARRAY(
SELECT AS STRUCT * REPLACE(
(SELECT AS STRUCT eventInfo.* REPLACE('some string' AS eventLabel)) AS eventInfo)
FROM UNNEST(hits)
)
WHERE TRUE;
Bigquery update / insert in nested arrays and arrays of structs
I was finally able to nail the problem.
To merge 2 records, I had to resort to subqueries pushing in some work. Although, I still think there are chances of improvement to this code.
-- INSERT IDs
INSERT `deep_test.main_table` (people_id)
(
SELECT distinct(people_id) FROM `deep_test.staging_test`
WHERE people_id NOT IN ( SELECT people_id FROM `deep_test.main_table` )
);
-- UPDATE TALENT RECORD
UPDATE
`deep_test.main_table` gold
SET
talent = B.talent
FROM
(
SELECT
gold.people_id as people_id,
ARRAY_AGG(aggregated_stage.talent) as talent
FROM
`deep_test.main_table` gold
JOIN
(
SELECT
A.people_id,
A.talent
FROM
(
SELECT
ARRAY_AGG( t
ORDER BY
t.createdAt DESC LIMIT 1 )[OFFSET(0)] A
FROM
`deep_test.staging_test` t
GROUP BY
t.people_id,
t.talent.people_l_id,
t.talent.fiscalYear
)
) as aggregated_stage
ON gold.people_id = aggregated_stage.people_id
WHERE aggregated_stage.talent is not null
GROUP BY people_id
)
B
WHERE
B.people_id = gold.people_id;
-- UPDATE COUNTRY CODE
UPDATE `deep_test.core` core
set core.country_code = countries.number
FROM
(
select people_id , (select country from UNNEST(talent) as d order by d.fiscalYear DESC limit 1) as country FROM `deep_test.core`
) B, `deep_test.countries` countries
WHERE
core.people_id = B.people_id
AND countries.code = B.country;
This creates a subquery and assigns the results to a variable. This variable can be used as a table in for querying and joining the results with another table.
Update struct or nested field in bigquery
Unfortunately BigQuery does not support updating separate fields of a query or a nested field. The following syntax is not supported:
UPDATE myTable
SET Employee.name = 'some string'
WHERE id = 10
We can update structs only as a whole:
UPDATE myTable
SET Employee = STRUCT('aaa', 'bbb')
WHERE id = 10
Google BigQuery - Updating a nested repeated field
This should help get you started. You need to include everything else from hits
in order to perform the update, including the nested page
.
UPDATE `you_dataset.tablename`
SET hits = ARRAY(
SELECT AS STRUCT * REPLACE (
(SELECT AS STRUCT page.* REPLACE ('foo' AS pagePath)) AS page
)
FROM UNNEST(hits) as pagePath
)
WHERE fullVisitorID like "%1%"
Update a nested field in BigQuery using another nested field as a condition
Your WHERE clause should be like below
WHERE EXISTS (
SELECT 1 FROM UNNEST(hits) AS h
WHERE h.eventInfo.eventCategory LIKE '%SEARCH%'
)
BigQuery UPDATE of column within nest from GoogleAnalytics export
You're so close with the third query. You should recreate the hits column, but by preserving the original data structure.
In the query below, I get all the hit rows and replace their sourcePropertyInfo key in the struct.
Then, since I unnested the hits, to gather it again, I used array_agg so it becomes an array again.
update `my_project.my_dataset.ga_sessions_20220801`
set hits =
(
select array_agg(t)
from (
select
hit.* replace(
struct(
hit.sourcePropertyInfo.sourcePropertyDisplayName,
'some value' as sourcePropertyTrackingId
) as sourcePropertyInfo
)
from unnest(hits) as hit
) as t
)
where true
Related Topics
Oracle Combine Several Columns into One
Oracle Ora-00979 - "Not a Group by Expression"
Pivot Table with Non-Cardinal Values
Updating Row with Subquery Returning Multiple Rows
Ora-00936: Missing Expression Oracle
Select Rownum from Salary Where Rownum=3;
Table as an Argument of a Postgresql Function
How to Partition Postgres Table Using Intermediate Table
Select Last Records from Table Using Group By
How to Obtain Unique Results from a Select with Joined Records
How to Write a Simple Database Engine
What Are Some of Your Most Useful Database Standards
Query for Searching the Name Alphabetically
How to Emulate Repeat() in SQLite
Postgresql 9.4 - Prevent App Selecting Always the Latest Updated Rows
Problem with Alter Then Update in Try Catch with Tran Using Transact-Sql