Oracle -- Split Multiple Comma Separated Values in Oracle Table to Multiple Rows

Split comma separated values into multiple rows - Oracle SQL

Here's one option; you need lines 6 onwards.

SQL> with table1 (test_type, h_level, hadoop) as
2 (select '22RETGGEDGDD, RRMMNFNEDGDD', 'Control Directory 1', 'Interchange Control Header' from dual
3 union all
4 select '22RETGGEDGDD, RRMMNFNEDGDD', 'Control Directory 2', 'Interchange Control Header' from dual
5 )
6 select trim(regexp_substr(test_type, '[^,]+', 1, column_value)) test_type,
7 h_level,
8 hadoop
9 from table1 join table(cast(multiset(select level from dual
10 connect by level <= regexp_count(test_type, ',') + 1
11 ) as sys.odcinumberlist )) on 1 = 1
12 order by 2, 1 desc, 3;

TEST_TYPE H_LEVEL HADOOP
-------------------- ------------------- --------------------------
22RETGGEDGDD Control Directory 1 Interchange Control Header
RRMMNFNEDGDD Control Directory 1 Interchange Control Header
22RETGGEDGDD Control Directory 2 Interchange Control Header
RRMMNFNEDGDD Control Directory 2 Interchange Control Header

SQL>

Multiple row data in single row as comma separated (Oracle)

This is what LISTAGG was built for

  SELECT t.USER, t.group, LISTAGG (t.sub, ',') WITHIN GROUP (ORDER BY t.pk_id)
FROM your_table t
GROUP BY t.USER, t.GROUP

oracle -- Split multiple comma separated values in oracle table to multiple rows

Finally I came up with this answer

WITH CTE AS (SELECT 'a,b,c,d,e' temp, 1 slno FROM DUAL
UNION
SELECT 'f,g' temp, 2 slno FROM DUAL
UNION
SELECT 'h' temp, 3 slno FROM DUAL)
SELECT TRIM(REGEXP_SUBSTR(temp, '[^,]+', 1, level)), slno
FROM CTE
CONNECT BY level <= REGEXP_COUNT(temp, '[^,]+')
AND PRIOR slno = slno
AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL

How to split comma-separated values into multiple rows in Oracle table


Step 1 : "How to blow up a database"

From :

SQL Fiddle

Oracle 11g R2 Schema Setup:

Query 1:

select * from films11

Results:

| YEAR | DIRECTOR | MOVIETITLE |      ACTORNAME |
|------|----------|------------|----------------|
| 2000 | dir1 | title1 | act1,act2 |
| 2001 | dir2 | title2 | act1,act2,act3 |
| 2002 | dir1 | title3 | act4 |

Query 2:

select YT.year, YT.movietitle,
REPLACE(REGEXP_SUBSTR(YT.actorname||',','.*?,',1,lvl.lvl),',','') AS actorname
from films11 YT
join (select level as lvl
from dual
connect by level <= (select max(regexp_count(actorname,',')+1) from films11)
) lvl on lvl.lvl <= regexp_count(YT.actorname,',')+1
order by YT.year, YT.movietitle, actorname

With a nice Cartesian product :

Results:

| YEAR | MOVIETITLE | ACTORNAME |
|------|------------|-----------|
| 2000 | title1 | act1 |
| 2000 | title1 | act2 |
| 2001 | title2 | act1 |
| 2001 | title2 | act2 |
| 2001 | title2 | act3 |
| 2002 | title3 | act4 |

You run it ONCE and use it to move everything to a normalized DB


Here is the full script to change your schema to something more convenient...

CREATE TABLE actors(
id_actor NUMBER GENERATED BY DEFAULT ON NULL AS IDENTITY,
act_name VARCHAR2(100)
)
;

CREATE TABLE directors(
id_director NUMBER GENERATED BY DEFAULT ON NULL AS IDENTITY,
dir_name VARCHAR2(100)
)
;

CREATE TABLE movies(
id_movie NUMBER GENERATED BY DEFAULT ON NULL AS IDENTITY,
mov_year NUMBER,
mov_name VARCHAR2(100),
director_id NUMBER
)
;

CREATE TABLE playedby(
movie_id NUMBER,
actor_id NUMBER
)
;

INSERT INTO directors (dir_name)
SELECT DISTINCT director dir_name
FROM films11
;

INSERT INTO movies (mov_year, mov_name, director_id)
SELECT year mov_year, movietitle mov_name, directors.id_director director_id
FROM films11
INNER JOIN directors ON directors.dir_name = films11.director

;

INSERT INTO actors (act_name)
SELECT DISTINCT t.actorname act_name
FROM (
SELECT YT.year, YT.movietitle,
REPLACE(REGEXP_SUBSTR(YT.actorname||',','.*?,',1,lvl.lvl),',','') AS actorname
FROM films11 YT
JOIN (SELECT level AS lvl
FROM dual
CONNECT BY level <= (SELECT MAX(REGEXP_COUNT(actorname,',')+1) FROM films11)
) lvl ON lvl.lvl <= REGEXP_COUNT(YT.actorname,',')+1
) t
;

INSERT INTO playedby (movie_id, actor_id)
SELECT movies.id_movie movie_id, actors.id_actor actor_id
FROM (
SELECT YT.year, YT.movietitle,
REPLACE(REGEXP_SUBSTR(YT.actorname||',','.*?,',1,lvl.lvl),',','') AS actorname
FROM films11 YT
JOIN (SELECT level AS lvl
FROM dual
CONNECT BY level <= (SELECT MAX(REGEXP_COUNT(actorname,',')+1) FROM films11)
) lvl ON lvl.lvl <= REGEXP_COUNT(YT.actorname,',')+1
) t
INNER JOIN actors ON t.actorname = actors.act_name
INNER JOIN movies ON t.year = movies.mov_year AND t.movietitle = movies.mov_name

;

After that you can just make a select like that :

Query 3:

SELECT mov_year, mov_name, dir_name, act_name 
FROM movies
INNER JOIN directors ON directors.id_director = movies.director_id
INNER JOIN playedby ON movies.id_movie = playedby.movie_id
INNER JOIN actors ON playedby.actor_id = actors.id_actor
WHERE act_name like '%act2%'
order by mov_year asc

Results:

| MOV_YEAR | MOV_NAME | DIR_NAME | ACT_NAME |
|----------|----------|----------|----------|
| 2000 | title1 | dir1 | act2 |
| 2001 | title2 | dir2 | act2 |

Expanding Oracle rows with comma-delimited values into multiple rows


Option 1: Simple, fast string functions and a recursive query:

with t (key, vals) as (
SELECT 'k1', 'a,b' FROM DUAL UNION ALL
SELECT 'k2', 'c,d,e' FROM DUAL
),
bounds (key, vals, spos, epos) AS (
SELECT key, vals, 1, INSTR(vals, ',', 1)
FROM t
UNION ALL
SELECT key, vals, epos + 1, INSTR(vals, ',', epos + 1)
FROM bounds
WHERE epos > 0
)
SEARCH DEPTH FIRST BY key SET key_order
SELECT key,
CASE epos
WHEN 0
THEN SUBSTR(vals, spos)
ELSE SUBSTR(vals, spos, epos - spos)
END AS val
FROM bounds;

Option 2: Slower regular expressions in a LATERAL joined hierarchical query

This option requires Oracle 12 or later.

with t (key, vals) as (
SELECT 'k1', 'a,b' FROM DUAL UNION ALL
SELECT 'k2', 'c,d,e' FROM DUAL
)
SELECT key, val
FROM t
LEFT OUTER JOIN LATERAL (
SELECT regexp_substr(vals, '[^,]+', 1, level) AS val
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(vals, '[^,]+')
)
ON (1 = 1)

Option 3: Recursive query correlating to parent rows.

This option is the slowest of the options as it needs to correlate between levels of the hierarchy and generate a GUID at each step (which is seemingly useless but prevents unnecessary recursion).

with t (key, vals) as (
SELECT 'k1', 'a,b' FROM DUAL UNION ALL
SELECT 'k2', 'c,d,e' FROM DUAL
)
SELECT key,
regexp_substr(vals, '[^,]+', 1, level) AS val
FROM t
CONNECT BY LEVEL <= REGEXP_COUNT(vals, '[^,]+')
AND PRIOR key = key
AND PRIOR SYS_GUID() IS NOT NULL;

Which all output:



Leave a reply



Submit