Google Bigquery Case Function

Google BigQuery CASE function

2013 update: BigQuery supports CASE:

SELECT CASE WHEN x=1 THEN 'one' WHEN x=2 THEN 'two' ELSE 'more' END 
FROM (SELECT 1 AS x)

'one'

Bigquery CASE function not working as expected

Use below instead

SELECT CASE 
WHEN REGEXP_CONTAINS(brand_name, r"pen|scissors") THEN 'Stationery'
WHEN REGEXP_CONTAINS(brand_name, r"chair|table") THEN 'Furniture'
WHEN REGEXP_CONTAINS(brand_name, r"cup|pot") THEN 'Household'
ELSE 'Stationery'
END AS brand_type
FROM table

As you can see, you had extra CASE which broke the logic.

Also you do not need to use =TRUE

Function names case sensitivity in BigQuery

What you've found is correct. Documentation has been updated to reflect it:

| Category                    | Case Sensitive? |
| Built-in Function names | No |
| User-Defined Function names | Yes |

How to create multiple 'THEN' clauses for a BigQuery standard SQL case statement?

Below is for BigQuery Standard SQL

First, let's correct your initial query so that it actually produces expected result

#standardSQL
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name )
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name )
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name )
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name )
ELSE NULL
END AS field1,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN [score_one, score_two, score_three]
WHEN one_x1 = two_x1 THEN [score_one, score_two]
WHEN one_x1 = three_x1 THEN [score_one, score_three]
WHEN two_x1 = three_x1 THEN [score_two, score_three]
ELSE NULL
END AS field2
FROM `project.dataset.table`

If to apply to sample data from your question - result is

Row id  field1                  field2   
1 1 Dog, Animal 0.82
0.72
2 2 Horse, Animal, Bird 0.76
0.73
0.9
3 3 Dog, Animal, Chicken 0.67
0.75
0.65
4 4 Bird, Chicken 0.87
0.86

Next, as I understand, you want to avoid repeating same conditions again and again in your CASE - for this - you can use below trick

#standardSQL

SELECT id, fields.* FROM (
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name, ", ", object3_name) AS field1, [score_one, score_two, score_three] AS field2)
WHEN one_x1 = two_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name ) AS field1, [score_one, score_two] AS field2)
WHEN one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object3_name ) AS field1, [score_one, score_three] AS field2)
WHEN two_x1 = three_x1 THEN
STRUCT(CONCAT( object2_name, ", ", object3_name ) AS field1, [score_two, score_three] AS field2)
ELSE NULL
END AS fields
FROM `project.dataset.table`
)

obviously with the same output ...

And, finally, as yet another option for yo - you can eliminate all those case/when/then with below approach

#standardSQL
SELECT id,
(SELECT STRING_AGG(object) FROM UNNEST(objects) object WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field1,
(SELECT ARRAY_AGG(score) FROM UNNEST(scores) score WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field2
FROM (
SELECT id,
[object1_name, object2_name, object3_name] objects,
[score_one, score_two, score_three] scores,
(SELECT ARRAY_AGG(OFFSET)
FROM UNNEST([one_x1, two_x1, three_x1]) x WITH OFFSET
GROUP BY x HAVING COUNT(1) > 1
) pos
FROM `project.dataset.table`
)

again with the same output

SQL/BigQuery: case when statement over partition by

Consider below query.

if author_type is 'bot' and part_type is 'comment', then label it as 'bot' in a new column for all the rows of that conversation id.

SELECT *,
MAX(IF(author_type = 'bot' AND part_type = 'comment', 'bot', NULL)) OVER (PARTITION BY conv_id) AS tag
FROM sample_table
ORDER BY conv_id

Sample Image

BigQuery - CASE WHEN x IN (field from other table), results in Semijoin error

I suggest joining a summarized result along these lines:

SELECT 
CASE WHEN p.has_purchased = 1 THEN "Purchased"
ELSE "Never Purchased"
END AS Previous_Purchase
FROM existing_from_table f
LEFT JOIN (
SELECT fullVisitorId, 1 has_purchased
FROM [xxxx-0000:36363.unique_FullVisitorIds_of_past_purchasers_table]
GROUP BY fullVisitorId
) p on f.fullVisitorId = p.fullVisitorId

nb: The group by is used so that the overall number of rows is not affected.

Having a problem with Case When function in SQL

Below is for BigQuery Standard SQL

#standardSQL
SELECT
IF(
SAFE_CAST(i1 AS INT64) IS NULL,
i2,
IF(SAFE_CAST(i0 AS INT64) IS NULL, i1, NULL)
) ID_VARIABLE
FROM `project.dataset.table`,
UNNEST([STRUCT<i0 STRING, i1 STRING, i2 STRING>(
SPLIT(LINE_ITEM, '_')[OFFSET(0)],
SPLIT(LINE_ITEM, '_')[OFFSET(1)],
SPLIT(LINE_ITEM, '_')[OFFSET(2)]
)])

If to apply to dummy data from your question as below

#standardSQL
WITH `project.dataset.table` AS (
SELECT 'HM_9991256_RDR_LL_BA_NONE_REDDEAD' LINE_ITEM UNION ALL
SELECT 'HW_1005489_HWA_AA_DG_NONE_HELLOWORLD' UNION ALL
SELECT 'GG_8504807_BBA_CL_OI_NONE_GOODGAME' UNION ALL
SELECT '100_BB_4186482_CGA_IG_NONE_CALIFORNIA' UNION ALL
SELECT '200_HB_48558012_DDA_EV_NONE_EVEONLINE' UNION ALL
SELECT '900_GI_55488941_CA_DV_NONE_BATTLEFIELD'
)
SELECT
IF(
SAFE_CAST(i1 AS INT64) IS NULL,
i2,
IF(SAFE_CAST(i0 AS INT64) IS NULL, i1, NULL)
) ID_VARIABLE
FROM `project.dataset.table`,
UNNEST([STRUCT<i0 STRING, i1 STRING, i2 STRING>(
SPLIT(LINE_ITEM, '_')[OFFSET(0)],
SPLIT(LINE_ITEM, '_')[OFFSET(1)],
SPLIT(LINE_ITEM, '_')[OFFSET(2)]
)])

result will be as expected:

Row ID_VARIABLE  
1 9991256
2 1005489
3 8504807
4 4186482
5 48558012
6 55488941

If for some reason you are bound to CASE WHEN statement - below should work for you

#standardSQL   
SELECT
CASE WHEN REGEXP_CONTAINS(i1, r'[^0-9]') THEN REGEXP_REPLACE(i2, r'[^0-9]', '')
WHEN REGEXP_CONTAINS(i0, r'[^0-9]') THEN REGEXP_REPLACE(i1, r'[^0-9]', '')
END AS ID_VARIABLE
FROM `project.dataset.table`,
UNNEST([STRUCT<i0 STRING, i1 STRING, i2 STRING>(
SPLIT(LINE_ITEM, '_')[OFFSET(0)],
SPLIT(LINE_ITEM, '_')[OFFSET(1)],
SPLIT(LINE_ITEM, '_')[OFFSET(2)]
)])


Related Topics



Leave a reply



Submit