Splitting Columns by Number of Characters

Split string column based on number of characters

df['columnB']=df['columnA'].str.slice(stop=3)
df['columnA']=df['columnA'].str.slice(start=3)

Splitting a pandas column every n characters

The answer is quite simple, that is, insert a delimiter and split it.

For example, use | as the delimiter and let n = 4:

series = pd.Series(['This is an even longer string', 'This is the longest string of them all'],name='str1')
name = series.name
cols = series.str.replace('(.{10})', r'\1|').str.split('|', n=4, expand=True).add_prefix(f'{name}_')

That is, use str.replace to add delimiter, use str.split to split them apart and use add_prefix to add the prefixes.

The output will be:

    str1_0      str1_1      str1_2      str1_3
0   This is an  even long   er string   None
1   This is th  e longest   string of   them all

The reason why str.split('.{10}') doesn't work is that the pat param in the function str.split is a pattern to match the strings as split delimiters but not strings that should be in splited results. Therefore, with str.split('.{10}'), you get one character every 10-th chars.

UPDATE: Accroding to the suggestion from @AKX, use \x1F as a better delimiter:

cols = series.str.replace('(.{10})', '\\1\x1F').str.split('\x1F', n=4, expand=True).add_prefix(f'{name}_')

Note the absence of the r string flags.

Split data frame column by number of characters specified in another column

A simple solution would be:

dat <- dat %>% mutate(x1 = substring(x, 1, y),
                      x2 = substring(x, y + 1, nchar(x)))

Split by character quantity, create new columns with substrings Python

You can use:

N = 4

# Custom function to split string
split_string = lambda x: pd.Series([x[i:i+N] for i in range(0, len(x), N)])

new_var = df['var2'].apply(split_string).fillna('')
new_var.columns = 'new_var' + (new_var.columns + 1).astype(str).str.zfill(3)

df = df.join(new_var)

Output:

var1	var2	new_var001	new_var002	new_var003	new_var004	new_var005
1	abcdefghi	abcd	efgh	i
2	abcdefghijklmnop	abcd	efgh	ijkl	mnop
3	abc	abc
4	abcdefghijklmnopqrst	abcd	efgh	ijkl	mnop	qrst

Split an string by number of characters in a column of a data frame to create multiple columns in R?

We can use separate

library(tidyr)
separate(df, ID, into = c("Spl_1", "Spl_2"), sep = 4, remove = FALSE)
#           ID Spl_1  Spl_2 Var1 Var2
#1  0334KLM001  0334 KLM001   aa   xx
#2  1334HDM002  1334 HDM002  zvv   rr
#3  2334WEM003  2334 WEM003 qetr  qwe
#4  3334OKT004  3334 OKT004   ff  sdf
#5  4334WER005  4334 WER005   ee  sdf
#6  5334BBC006  5334 BBC006  qly  ssg
#7  6334QQQ007  6334 QQQ007   kk  htj
#8  7334AAA008  7334 AAA008   uu  yjy
#9  8334CBU009  8334 CBU009   ww wttt
#10 9334MLO010  9334 MLO010   aa   dg

If we want 3 columns, we can pass a vector in sep

separate(df, ID, into = c("Spl_1", "Spl_2", "Spl_3"), sep = c(4,8), remove = FALSE)
#           ID Spl_1 Spl_2 Spl_3 Var1 Var2
#1  0334KLM001  0334  KLM0    01   aa   xx
#2  1334HDM002  1334  HDM0    02  zvv   rr
#3  2334WEM003  2334  WEM0    03 qetr  qwe
#4  3334OKT004  3334  OKT0    04   ff  sdf
#5  4334WER005  4334  WER0    05   ee  sdf
#6  5334BBC006  5334  BBC0    06  qly  ssg
#7  6334QQQ007  6334  QQQ0    07   kk  htj
#8  7334AAA008  7334  AAA0    08   uu  yjy
#9  8334CBU009  8334  CBU0    09   ww wttt
#10 9334MLO010  9334  MLO0    10   aa   dg

If the numbers at the beginning are not of fixed length, use extract

extract(df, ID, into = c("Spl_1", "Spl_2"), "^([0-9]+)(.*)", remove = FALSE)

and for 3 columns,

extract(df, ID, into = c("Spl_1", "Spl_2", "Spl_3"), "(.{4})(.{4})(.*)", remove = FALSE)

Split column value into separate columns based on length

siddesh, although this question lacks of everything I want to point some things out and help you (as you are an unexperienced SO-user):

First I set up a minimal reproducible exampel. This is on you the next time.

I'll start with a declared table with some rows inserted.

We on SO can copy'n'paste this into our environment which makes it easy to answer.

DECLARE @tbl TABLE(ID INT IDENTITY, YourCSVString VARCHAR(MAX));
INSERT INTO @tbl VALUES('1 this is long text, 2 some second fragment, 3 third fragment, 4 adfjksdahfljsadhfjhadlfhasdjks alsdjfsadhf k, 5 halksjfh asdkf ')
                      ,('1 this is other long text, 2 some second fragment to show that this works with tabular data, 3 again a third fragment, 4 adfjksdahfljsadhfjhadlfhasdjks alsdjfsadhf k, 5 halksjfh asdkf ');

--This is, what you actually need:

SELECT fkID = t.ID
      ,B.fragmentPosition
      ,B.fragmentContent 
      ,C.framgentLength
FROM @tbl t
CROSS APPLY OPENJSON(CONCAT(N'["',REPLACE(t.YourCSVString,N',','","'),'"]')) A
CROSS APPLY(VALUES(A.[key],TRIM(A.[value]))) B(fragmentPosition,fragmentContent)
CROSS APPLY(VALUES(LEN(B.fragmentContent))) C(framgentLength);

The result should be stored within a physical table, where the fkID points to the ID of the original row and the fragmentPosition stores the order. fkID and fragmentPosition should be a combined unique key.

If you really want to do, what you are suggesting in your question (not recommended!) you can try something along this:

DECLARE @maxPerColumn INT=75;  --You can set the portion's max size, in your case 2000.

WITH cte AS
(
    SELECT fkID = t.ID
          ,B.fragmentPosition
          ,B.fragmentContent 
          ,C.framgentLength
    FROM @tbl t
    CROSS APPLY OPENJSON(CONCAT(N'["',REPLACE(t.YourCSVString,N',','","'),'"]')) A
    CROSS APPLY(VALUES(A.[key],TRIM(A.[value]))) B(fragmentPosition,fragmentContent)
    CROSS APPLY(VALUES(LEN(B.fragmentContent))) C(framgentLength)
)
,recCTE AS
(
    SELECT * 
          ,countPerColumn = 1
          ,columnCounter = 1
          ,sumLength = LEN(fragmentContent)
          ,growingString = CAST(fragmentContent AS NVARCHAR(MAX)) 
    FROM cte WHERE fragmentPosition=0

    UNION ALL
    SELECT r.fkID
          ,cte.fragmentPosition
          ,cte.fragmentContent
          ,cte.framgentLength
          ,CASE WHEN A.newSumLength>@maxPerColumn THEN 1 ELSE r.countPerColumn + 1 END
          ,r.columnCounter + CASE WHEN A.newSumLength>@maxPerColumn THEN 1 ELSE 0 END
          ,CASE WHEN A.newSumLength>@maxPerColumn THEN LEN(cte.fragmentContent) ELSE newSumLength END
          ,CASE WHEN A.newSumLength>@maxPerColumn THEN cte.fragmentContent ELSE CONCAT(r.growingString,N', ',cte.fragmentContent) END
    FROM cte
    INNER JOIN recCTE r ON r.fkID=cte.fkID AND r.fragmentPosition+1=cte.fragmentPosition 
    CROSS APPLY(VALUES(r.sumLength+LEN(cte.fragmentContent))) A(newSumLength)
)
SELECT TOP 1 WITH TIES 
       fkID
      ,growingString
      ,LEN(growingString)
FROM recCTE 
ORDER BY ROW_NUMBER() OVER(PARTITION BY fkID,columnCounter ORDER BY countPerColumn DESC );

The result

fkID    pos Content
1       2   1 this is long text, 2 some second fragment, 3 third fragment
1       4   4 adfjksdahfljsadhfjhadlfhasdjks alsdjfsadhf k, 5 halksjfh asdkf
2       0   1 this is other long text
2       1   2 some second fragment to show that this works with tabular data
2       3   3 again a third fragment, 4 adfjksdahfljsadhfjhadlfhasdjks alsdjfsadhf k
2       4   5 halksjfh asdkf

The idea in short:

The first cte does the splitting (as above)
The recursive cte will iterate down the string and do the magic.
The final SELECT uses a hack with TOP 1 WITH TIES together with an ORDER BY ROW_NUMBER() OVER(...). This will return the highest intermediate result only.

Hint: Don't do this...

UPDATE

Just for fun:

You can replace the final SELECT with this

,getPortions AS
(
    SELECT TOP 1 WITH TIES 
           fkID
          ,fragmentPosition
          ,growingString
          ,LEN(growingString) portionLength
    FROM recCTE 
    ORDER BY ROW_NUMBER() OVER(PARTITION BY fkID,columnCounter ORDER BY countPerColumn DESC )
)
SELECT p.*
FROM
(
    SELECT fkID
          ,CONCAT(N'col',ROW_NUMBER() OVER(PARTITION BY fkID ORDER BY fragmentPosition)) AS ColumnName
          ,growingString
    FROM getPortions
) t
PIVOT(MAX(growingString) FOR ColumnName IN(col1,col2,col3,col4,col5)) p;

This will return exactly what you are asking for.

But - as said before - this is against all rules of best practice...

Split string every n characters new column

Alternatively, you can try read.fwf in base R. No special package is needed:

tmp <- read.fwf(
    textConnection(dtf$var2),
    widths = rep(4, ceiling(max(nchar(dtf$var2) / 4))),
    stringsAsFactors = FALSE)

cbind(dtf, tmp)

#   var1                 var2   V1   V2   V3   V4   V5
# 1    1            abcdefghi abcd efgh    i <NA> <NA>
# 2    2     abcdefghijklmnop abcd efgh ijkl mnop <NA>
# 3    3                  abc  abc <NA> <NA> <NA> <NA>
# 4    4 abcdefghijklmnopqrst abcd efgh ijkl mnop qrst

Split R string into individual characters

You could use

data.frame(Reduce(rbind, strsplit(df$V1, "")))

This returns

     X1 X2 X3 X4 X5 X6
init  g  g  g  g  c  c
X     c  c  c  c  t  t
X.1   t  t  t  t  t  t
X.2   a  a  a  a  a  a

data.frame(do.call(rbind, strsplit(df$V1, "")))

which returns

  X1 X2 X3 X4 X5 X6
1  g  g  g  g  c  c
2  c  c  c  c  t  t
3  t  t  t  t  t  t
4  a  a  a  a  a  a