Redshift Split Single Dynamic Column into Multiple Rows in New Table

Redshift split single dynamic column into multiple rows in new table

Here is the Redshift answer, it will work with up to 10 thousand segment ids values per row.

test data

create table test_split (uid varchar(50),segmentids varchar(max));
insert into test_split
values
('f9b6d54b-c646-4bbb-b0ec','4454918|4455158|4455638|4455878|4455998'),
('asd7a0s9-c646-asd7-b0ec','1265899|1265923|1265935|1266826|1266596'),
('asd7345s9-c646-asd7-b0ec','1235935|1263456|1265675696'),
('as345a0s9-c646-asd7-b0ec','12765899|12658883|12777935|144466826|1266226|12345')
;

code

with ten_numbers as (select 1 as num union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9 union select 0)
, generted_numbers AS
(
SELECT (1000 * t1.num) + (100 * t2.num) + (10 * t3.num) + t4.num AS gen_num
FROM ten_numbers AS t1
JOIN ten_numbers AS t2 ON 1 = 1
JOIN ten_numbers AS t3 ON 1 = 1
JOIN ten_numbers AS t4 ON 1 = 1
)
, splitter AS
(
SELECT *
FROM generted_numbers
WHERE gen_num BETWEEN 1 AND (SELECT max(REGEXP_COUNT(segmentids, '\\|') + 1)
FROM test_split)
)
--select * from splitter;
, expanded_input AS
(
SELECT
uid,
split_part(segmentids, '|', s.gen_num) AS segment
FROM test_split AS ts
JOIN splitter AS s ON 1 = 1
WHERE split_part(segmentids, '|', s.gen_num) <> ''
)
SELECT * FROM expanded_input;

the first 2 cte steps (ten_numbers and generated_numbers) are used to generate a number of rows, this is needed because generate_series is not supported

The next step (splitter) just takes a number of rows equal to the max number of delimiters + 1 (which is the max number of segments)

finally, we cross join splitter with the input data, take the related value using split_part and then exclude blank parts (which are caused where the row has < the max number of segments)

Split values over multiple rows in RedShift

Here is the Redshift answer, it will work with up to 10 thousand values per row.

Set up test data

create table test_data (key varchar(50),data varchar(max));
insert into test_data
values
(1,'18,20,22'),
(2,'17,19')
;

code

with ten_numbers as (select 1 as num union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9 union select 0)
, generted_numbers AS
(
SELECT (1000 * t1.num) + (100 * t2.num) + (10 * t3.num) + t4.num AS gen_num
FROM ten_numbers AS t1
JOIN ten_numbers AS t2 ON 1 = 1
JOIN ten_numbers AS t3 ON 1 = 1
JOIN ten_numbers AS t4 ON 1 = 1
)
, splitter AS
(
SELECT *
FROM generted_numbers
WHERE gen_num BETWEEN 1 AND (SELECT max(REGEXP_COUNT(data, '\\,') + 1)
FROM test_data)
)
, expanded_input AS
(
SELECT
key,
split_part(data, ',', s.gen_num) AS data
FROM test_data AS td
JOIN splitter AS s ON 1 = 1
WHERE split_part(data, ',', s.gen_num) <> ''
)
SELECT * FROM expanded_input
order by key,data;

Convert comma delimited values in a column into rows

If you know the maximum number of values, I think you can split_part():

select t.store, t.location, split_part(products, ',', n.n) as product
from t join
(select 1 as n union all
select 2 union all
select 3 union all
select 4
) n
on split_part(products, ',', n.n) <> '';

You can also use:

select t.store, t.location, split_part(products, ',', 1) as product
from t
union all
select t.store, t.location, split_part(products, ',', 2) as product
from t
where split_part(products, ',', 2) <> ''
union all
select t.store, t.location, split_part(products, ',', 3) as product
from t
where split_part(products, ',', 3) <> ''
union all
select t.store, t.location, split_part(products, ',', 4) as product
from t
where split_part(products, ',', 4) <> ''
union all
. . .

How to write redshift aws query to search for a value in comma delimited values

The simple way isn't always the best. There are a number of corner cases that can arise here (like are all country codes 2 letters). That said a LIKE clause would be simple:

select tb1.user_id, valid_country as country_code
from table1 tb1, table2 tb2
where tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'

Or if we are to put this in modern SQL syntax:

select tb1.user_id, valid_country as country_code
from table1 tb1 join table2 tb2
on tb1.user_id=tb2.user_id
and tb1.country_code like '%'||tb2.valid_country||'%'

Split json-format column into multiple columns in Redshift

I assume that these are strings in Redshift that contain json (not the new Super data type). Redshift has several json parsing functions - https://docs.aws.amazon.com/redshift/latest/dg/json-functions.html

I think you want to use the json_extract_path_text() function - https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html

select json_extract_path_text(feature_column, '{reporting}') as reporting from ...


Related Topics



Leave a reply



Submit