Convert Unknown Number of Comma Separated Varchars Within 1 Column into Multiple Columns

Convert unknown number of comma separated varchars within 1 column into multiple columns

I made one assumption while creating this answer, which is that you need this as a separate stored proc.

Step 1

Create a data type to enable the use of passing a table-valued parameter (TVP) into a stored proc.

use db_name
GO
create type axisTable as table
(
axis1 varchar(max)
)
GO

Step 2

Create the procedure to parse out the values.

USE [db_name]
GO

SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

CREATE PROCEDURE [dbo].[usp_util_parse_out_axis]
(
@axis_tbl_prelim axisTable readonly
)
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;

declare @axis_tbl axisTable

--since TVP's are readonly, moving the data in the TVP to a local variable
--so that the update statement later on will work as expected
insert into @axis_tbl
select *
from @axis_tbl_prelim

declare @comma_cnt int
, @i int
, @sql_dyn nvarchar(max)
, @col_list nvarchar(max)

--dropping the global temp table if it already exists
if object_id('tempdb..##axis_unpvt') is not null
drop table ##axis_unpvt

create table ##axis_unpvt
(
axis_nbr varchar(25)
, row_num int
, axis_val varchar(max)
)

--getting the most commas
set @comma_cnt = (select max(len(a.axis1) - len(replace(a.axis1, ',', '')))
from @axis_tbl as a)

set @i = 1
while @i <= @comma_cnt + 1
begin --while loop

--insert the data into the "unpivot" table one parsed value at a time (all rows)
insert into ##axis_unpvt
select 'axis' + cast(@i as varchar(3))
, row_number() over (order by (select 100)) as row_num --making sure the data stays in the right row
, case when charindex(',', a.axis1, 0) = 0 and len(a.axis1) = 0 then NULL
when charindex(',', a.axis1, 0) = 0 and len(a.axis1) > 0 then a.axis1
when charindex(',', a.axis1, 0) > 0 then replace(left(a.axis1, charindex(',', a.axis1, 0)), ',', '')
else NULL
end as axis1
from @axis_tbl as a

--getting rid of the value that was just inserted from the source table
update a
set a.axis1 = case when charindex(',', a.axis1, 0) = 0 and len(a.axis1) > 0 then NULL
when charindex(',', a.axis1, 0) > 0 then rtrim(ltrim(right(a.axis1, (len(a.axis1) - charindex(',', a.axis1, 0)))))
else NULL
end
from @axis_tbl as a
where 1=1
and (charindex(',', a.axis1, 0) = 0 and len(a.axis1) > 0
or charindex(',', a.axis1, 0) > 0)

--incrementing toward terminating condition
set @i += 1

end --while loop

--getting list of what the columns will be after pivoting
set @col_list = (select stuff((select distinct ', ' + axis_nbr
from ##axis_unpvt as a
for xml path ('')),1,1,''))

--building the pivot statement
set @sql_dyn = '
select '
+ @col_list +
'
from ##axis_unpvt as a
pivot (max(a.axis_val)
for a.axis_nbr in ('
+ @col_list +
')) as p'

--executing the pivot statement
exec(@sql_dyn);

END

Step 3

Make a procedure call using the data type created in Step 1 as the parameter.

use db_name
go

declare @tvp as axisTable

insert into @tvp values ('296.90, 309.4')
insert into @tvp values ('296.32, 309.81')
insert into @tvp values ('296.90')
insert into @tvp values ('300.11, 309.81, 311, 313.89, 314.00, 314.01, V61.8, V62.3')

exec db_name.dbo.usp_util_parse_out_axis @tvp

Results from your example are as follows:

axisTblResults

Pandas split column into multiple columns by comma

In case someone else wants to split a single column (deliminated by a value) into multiple columns - try this:

series.str.split(',', expand=True)

This answered the question I came here looking for.

Credit to EdChum's code that includes adding the split columns back to the dataframe.

pd.concat([df[[0]], df[1].str.split(', ', expand=True)], axis=1)

Note: The first argument df[[0]] is DataFrame.

The second argument df[1].str.split is the series that you want to split.

split Documentation

concat Documentation

Split a comma separated string of unknown elements to multiple columns in PostgreSQL 11.0

You can split it into an array, then access each array element:

select col1,
elements[1] as col2,
elements[2] as col3
from (
select col1, regexp_split_to_array(col1, '\s*;\s*') as elements
from the_table
) t

r split a string of data into multiple columns, sorted by individual variables

We can do an strsplit and then with mtabulate get the frequency

library(qdapTools)
do.call(cbind, lapply(df, function(x) mtabulate(strsplit(x, ","))))
# indication.1 indication.2 indication.3 treatment.1 treatment.2 treatment.3
#1 1 1 0 0 0 1
#2 0 1 0 1 1 0
#3 1 0 1 0 1 1

R: Split Variable Column into multiple (unbalanced) columns by comma

From Ananda's splitstackshape package:

cSplit(df, "Events", sep=",")
# Name Age Number First Events_1 Events_2 Events_3 Events_4
#1: Karen 24 8 0 Triathlon/IM Marathon 10k 5k
#2: Kurt 39 2 0 Half-Marathon 10k NA NA
#3: Leah 18 0 1 NA NA NA NA

Or with tidyr:

separate(df, 'Events', paste("Events", 1:4, sep="_"), sep=",", extra="drop")
# Name Age Number Events_1 Events_2 Events_3 Events_4 First
#1 Karen 24 8 Triathlon/IM Marathon 10k 5k 0
#2 Kurt 39 2 Half-Marathon 10k <NA> <NA> 0
#3 Leah 18 0 NA <NA> <NA> <NA> 1

With the data.table package:

setDT(df)[,paste0("Events_", 1:4) := tstrsplit(Events, ",")][,-"Events", with=F]
# Name Age Number First Events_1 Events_2 Events_3 Events_4
#1: Karen 24 8 0 Triathlon/IM Marathon 10k 5k
#2: Kurt 39 2 0 Half-Marathon 10k NA NA
#3: Leah 18 0 1 NA NA NA NA

Data

df <- structure(list(Name = structure(1:3, .Label = c("Karen", "Kurt", 
"Leah "), class = "factor"), Age = c(24L, 39L, 18L), Number = c(8L,
2L, 0L), Events = structure(c(3L, 2L, 1L), .Label = c(" NA",
" Half-Marathon,10k", " Triathlon/IM,Marathon,10k,5k"
), class = "factor"), First = c(0L, 0L, 1L)), .Names = c("Name",
"Age", "Number", "Events", "First"), class = "data.frame", row.names = c(NA,
-3L))

Splitting a string column with unequal size into multiple columns using R

This is a good occasion to make use of extra = merge argument of separate:

library(dplyr)
df %>%
separate(str, c('A', 'B', 'C'), sep= ";", extra = 'merge')
  no    A     B     C
1 1 M 12 M 13 <NA>
2 2 M 24 <NA> <NA>
3 3 <NA> <NA> <NA>
4 4 C 12 C 50 C 78

How to split a string column into two columns with a 'variable' delimiter?

Use Series.str.split with the regex \s+\.+\s+, which splits by 1+ spaces, 1+ periods, 1+ spaces:

df = pd.DataFrame({'A': ['Mayor ............... Paul Jones', 'Senator ................. Billy Twister', 'Congress Rep. .......... Chris Rock', 'Chief of Staff ....... Tony Allen']})

df[['Title', 'Name']] = df['A'].str.split('\s+\.+\s+', expand=True)

# A Title Name
# 0 Mayor ............... Paul Jones Mayor Paul Jones
# 1 Senator ................. Billy Twister Senator Billy Twister
# 2 Congress Rep. .......... Chris Rock Congress Rep. Chris Rock
# 3 Chief of Staff ....... Tony Allen Chief of Staff Tony Allen

How to split a comma and colon separated column into respective columns in R?

In base R, it can be done with read.dcf

out <- type.convert(as.data.frame( 
read.dcf(textConnection(paste(gsub(",", "\n", df1$col1),
collapse = "\n\n")))
), as.is = TRUE)

-output

> out
name Age City
1 Michael 31 NYC
2 Michael 31 NYC

Or using tidyverse

library(dplyr)
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
separate_rows(col1, sep = ",\\s*") %>%
separate(col1, into = c('col1', 'col2'), sep = ":") %>%
pivot_wider(names_from = col1, values_from = col2) %>%
select(-rn)
# A tibble: 2 × 3
name Age City
<chr> <chr> <chr>
1 Michael 31 NYC
2 Michael 31 NYC

data

df1 <- structure(list(col1 = c("name:Michael,Age:31,City:NYC",
"name:Michael,Age:31,City:NYC"
)), class = "data.frame", row.names = c(NA, -2L))

Splitting columns containing comma separated string to new row values

If need each combinations per splitted values by , use:

print (df)
variable val
0 'a','x','y' 10
1 'a','x','y','f' 80
2 's' 4

from itertools import combinations

df['variable'] = df['variable'].str.replace("'", "", regex=True)

s = [x.split(',') if ',' in x else (x,x) for x in df['variable']]
L = [(*y, z) for x, z in zip(s, df['val']) for y in combinations(x, 2)]
df = pd.DataFrame(L, columns=['variable 1','variable 2','val'])

print (df)
variable 1 variable 2 val
0 a x 10
1 a y 10
2 x y 10
3 a x 80
4 a y 80
5 a f 80
6 x y 80
7 x f 80
8 y f 80
9 s s 4


Related Topics



Leave a reply



Submit