Update Values in Struct Arrays in Bigquery

Update values in struct arrays in BigQuery

Below is for BigQuery Standard SQL

#standardSQL
UPDATE `project.dataset.table` t
SET weights =  
  ARRAY(
    SELECT AS STRUCT IF(animal = 'dragon', 'cat', animal) animal, value
    FROM t.weights 
  ) 
WHERE TRUE

Update struct or nested field in bigquery

Unfortunately BigQuery does not support updating separate fields of a query or a nested field. The following syntax is not supported:

UPDATE myTable
SET Employee.name = 'some string'
WHERE id = 10

We can update structs only as a whole:

UPDATE myTable
SET Employee = STRUCT('aaa', 'bbb')
WHERE id = 10

How do I update a field in the array of struct in BQ?

You need to replace the array itself, updating the desired value:

update  `mydataset.struct_3`
set address = ARRAY(SELECT IF(city="some_city", "some_uscity", city) FROM UNNEST(address))

Update Struct in BigQuery

Looking at the error message closely, the target column type is a STRUCT without Array

 STRUCT<string STRING, text STRING, provided STRING>

hereby as you hinted, the UNNEST is not necessary. try to update the STRUCT directly.

UPDATE `myTable` t
SET click_url =  
    (SELECT AS STRUCT(
      t.click_url.string, 
      t.click_url.text, 
      your_hard_coded_provided_string)
    )

WHERE id = 10

How to update a field in a nested array in Bigquery?

Below is for BigQuery Standard SQL

#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
  ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
    FROM UNNEST(trans.accounts) x
    LEFT JOIN UNNEST(relocations) y
    USING(account_id)
  ) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE

It is tested with below dummy data

initial dummy data that looks like below

[
  {
    "order_id": "order_id1",
    "order_time": "2019-06-28 01:05:16.346854 UTC",
    "trans": {
      "id": "id1",
      "amount": "1",
      "accounts": [
        {
          "role": "role1",
          "account_id": "account_id1",
          "region": "region1",
          "amount": "11"
        },
        {
          "role": "role2",
          "account_id": "account_id2",
          "region": "region2",
          "amount": "12"
        }
      ]
    }
  },
  {
    "order_id": "order_id2",
    "order_time": "2019-06-28 01:05:16.346854 UTC",
    "trans": {
      "id": "id2",
      "amount": "1",
      "accounts": [
        {
          "role": "role3",
          "account_id": "account_id1",
          "region": "region4",
          "amount": "13"
        },
        {
          "role": "role4",
          "account_id": "account_id3",
          "region": "region3",
          "amount": "14"
        }
      ]
    }
  }
]

after applying below adjustments

[
  {
    "account_id": "account_id1",
    "region": "regionA"
  },
  {
    "account_id": "account_id2",
    "region": "regionB"
  }
]

result is

[
  {
    "id": "id1",
    "amount": "1",
    "accounts": [
      {
        "role": "role1",
        "account_id": "account_id1",
        "region": "regionA",
        "amount": "11"
      },
      {
        "role": "role2",
        "account_id": "account_id2",
        "region": "regionB",
        "amount": "12"
      }
    ]
  },
  {
    "id": "id2",
    "amount": "1",
    "accounts": [
      {
        "role": "role3",
        "account_id": "account_id1",
        "region": "regionA",
        "amount": "13"
      },
      {
        "role": "role4",
        "account_id": "account_id3",
        "region": "region3",
        "amount": "14"
      }
    ]
  }
]

How do I change the values in the array of struct in BQ

#standardSQL
SELECT 
  boxes, 
  ARRAY(
    SELECT AS STRUCT 
      IF(colour = 'yellow', 'blue', colour) colour, 
      IF(colour = 'yellow', 5, id) id
    FROM UNNEST(colors)
  ) colors
FROM `mydataset.struct_4`

Variation of above would be

#standardSQL
SELECT 
  boxes, 
  ARRAY(
    SELECT IF(colour = 'yellow', 
      STRUCT<colour STRING,id INT64>('blue', 5), 
      STRUCT(colour, id)) 
    FROM UNNEST(colors)
  ) colors
FROM `mydataset.struct_4`

with the same output of course

Row boxes   colors.colour   colors.id    
1   box_1   brown           1    
            green           3    
            white           7    
2   box_2   blue            5    
            white           4

Update for: but how do I update the colour "yellow" to "blue" and its id to 5 using UPDATE DML statement :)

#standardSQL
UPDATE `mydataset.struct_4` SET colors = ARRAY(
    SELECT IF(colour = 'yellow', 
        STRUCT<colour STRING,id INT64>('blue', 5), 
        STRUCT(colour, id)) 
    FROM UNNEST(colors)
) WHERE TRUE

Bigquery update / insert in nested arrays and arrays of structs

I was finally able to nail the problem.
To merge 2 records, I had to resort to subqueries pushing in some work. Although, I still think there are chances of improvement to this code.

    -- INSERT IDs
INSERT `deep_test.main_table`  (people_id)
 (
   SELECT distinct(people_id) FROM `deep_test.staging_test`
   WHERE people_id NOT IN ( SELECT people_id FROM `deep_test.main_table` )
 );

-- UPDATE TALENT RECORD
UPDATE
  `deep_test.main_table`  gold
SET
  talent = B.talent
FROM
  (
    SELECT
      gold.people_id as people_id,
      ARRAY_AGG(aggregated_stage.talent) as talent
    FROM
      `deep_test.main_table` gold
      JOIN
        (
          SELECT
            A.people_id,
            A.talent
          FROM
            (
              SELECT
                ARRAY_AGG( t
              ORDER BY
                t.createdAt DESC LIMIT 1 )[OFFSET(0)] A
              FROM
                `deep_test.staging_test` t
              GROUP BY
                t.people_id,
                t.talent.people_l_id,
                t.talent.fiscalYear
            )
        ) as aggregated_stage
        ON gold.people_id = aggregated_stage.people_id
        WHERE aggregated_stage.talent is not null
        GROUP BY people_id
  )
  B
WHERE
  B.people_id = gold.people_id;

-- UPDATE COUNTRY CODE
UPDATE `deep_test.core` core
set core.country_code = countries.number
FROM
 (
 select people_id , (select country from UNNEST(talent) as d order by d.fiscalYear DESC limit 1) as country FROM `deep_test.core`
 ) B, `deep_test.countries` countries
WHERE
core.people_id = B.people_id
AND countries.code = B.country;

This creates a subquery and assigns the results to a variable. This variable can be used as a table in for querying and joining the results with another table.

Update Values in Struct Arrays in Bigquery