Split a String Without Removing the Delimiter in Swift

Split a String without removing the delimiter in Swift

This method works on CollectionTypes, rather than Strings, but it should be easy enough to adapt:

extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
return try indices
.filter { i in try isSplit(self[i]) }
.map { i in
defer { p = i }
return self[p..<i]
} + [suffixFrom(p)]
}
}

extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}

You could use it like this:

let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"

let puncSet = Set("!.,:".characters)

sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)

// ["Hello", ", my name is oisdk", ". This should split", ": but only at punctuation", "!"]

Or, this version, which uses a for-loop, and splits after the delimiter:

extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = []
for i in indices where try isSplit(self[i]) {
result.append(self[p...i])
p = i.successor()
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}


extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}

let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"

let puncSet = Set("!.,:".characters)

sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)

// ["Hello,", " my name is oisdk.", " This should split:", " but only at punctuation!"]

Or, if you wanted to get the most Swift features into one function (defer, throws, a Protocol extension, an evil flatMap, guard, and Optionals):

extension CollectionType {
func splitAt(@noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = try indices.flatMap { i in
guard try isSplit(self[i]) else { return nil }
defer { p = i.successor() }
return self[p...i]
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}

Split String into Array keeping delimiter/separator in Swift

Suppose you are splitting the string by a separator called separator, you can do the following:

let result = yourString.components(separatedBy:  separator) // first split
.flatMap { [$0, separator] } // add the separator after each split
.dropLast() // remove the last separator added
.filter { $0 != "" } // remove empty strings

For example:

let result = " Hello World ".components(separatedBy:  " ").flatMap { [$0, " "] }.dropLast().filter { $0 != "" }
print(result) // [" ", "Hello", " ", "World", " "]

In Swift, can you split a string by another string, not just a character?

import Foundation

let inputString = "This123Is123A123Test"
let splits = inputString.components(separatedBy: "123")

Split a String into an array in Swift?

The Swift way is to use the global split function, like so:

var fullName = "First Last"
var fullNameArr = split(fullName) {$0 == " "}
var firstName: String = fullNameArr[0]
var lastName: String? = fullNameArr.count > 1 ? fullNameArr[1] : nil

with Swift 2

In Swift 2 the use of split becomes a bit more complicated due to the introduction of the internal CharacterView type. This means that String no longer adopts the SequenceType or CollectionType protocols and you must instead use the .characters property to access a CharacterView type representation of a String instance. (Note: CharacterView does adopt SequenceType and CollectionType protocols).

let fullName = "First Last"
let fullNameArr = fullName.characters.split{$0 == " "}.map(String.init)
// or simply:
// let fullNameArr = fullName.characters.split{" "}.map(String.init)

fullNameArr[0] // First
fullNameArr[1] // Last

How can I split a text into sentences?

The Natural Language Toolkit (nltk.org) has what you need. This group posting indicates this does it:

import nltk.data

tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
fp = open("test.txt")
data = fp.read()
print '\n-----\n'.join(tokenizer.tokenize(data))

(I haven't tried it!)

How to split a string, but also keep the delimiters?

You can use lookahead and lookbehind, which are features of regular expressions.

System.out.println(Arrays.toString("a;b;c;d".split("(?<=;)")));
System.out.println(Arrays.toString("a;b;c;d".split("(?=;)")));
System.out.println(Arrays.toString("a;b;c;d".split("((?<=;)|(?=;))")));

And you will get:

[a;, b;, c;, d]
[a, ;b, ;c, ;d]
[a, ;, b, ;, c, ;, d]

The last one is what you want.

((?<=;)|(?=;)) equals to select an empty character before ; or after ;.

EDIT: Fabian Steeg's comments on readability is valid. Readability is always a problem with regular expressions. One thing I do to make regular expressions more readable is to create a variable, the name of which represents what the regular expression does. You can even put placeholders (e.g. %1$s) and use Java's String.format to replace the placeholders with the actual string you need to use; for example:

static public final String WITH_DELIMITER = "((?<=%1$s)|(?=%1$s))";

public void someMethod() {
final String[] aEach = "a;b;c;d".split(String.format(WITH_DELIMITER, ";"));
...
}

swift how to split string but contains the separators

If you don't know whether or not the string will contain spaces, you should probably use a Scanner

let string = "100 + 8 - 9 + 10"

let removed = string.replacingOccurrences(of: " ", with: "")
// "100+8-9+10"

let scanner = Scanner(string: removed)

let operators = CharacterSet(charactersIn: "+-*/")

var components: [String] = []

while !scanner.isAtEnd {
var value: Int = 0
if scanner.scanInt(&value) {
components.append("\(value)")
}

var op: NSString? = ""
if scanner.scanCharacters(from: operators, into: &op) {
components.append(op! as String)
}
}

print(components)

// ["100", "+", "8", "-", "9", "+", "10"]

How to split a string at the last occurence of a sequence

The range(of:...) method of String has a .backwards option
to find the last occurrence of a string.
Then substring(to:) and substring(from:) can be used with the
lower/upper bound of that range to extract the parts of the string
preceding/following the separator:

func parseTuple(from string: String) -> (String, Int)? {

if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string.substring(from: theRange.upperBound)) {
return (string.substring(to: theRange.lowerBound), i)
} else {
return nil
}
}

Example:

if let tuple = parseTuple(from: "Connect###Four###Player###7") {
print(tuple)
// ("Connect###Four###Player", 7)
}

Swift 4 update:

func parseTuple(from string: String) -> (String, Int)? {

if let theRange = string.range(of: "###", options: .backwards),
let i = Int(string[theRange.upperBound...]) {
return (String(string[...theRange.lowerBound]), i)
} else {
return nil
}
}


Related Topics



Leave a reply



Submit