Swift 2.1 [Uint8] --Utf8--> String

Swift 2.1 [UInt8] --utf8-- String?

let buffUInt8: Array<UInt8> = [97, 98, 115, 100, 114, 102, 103, 104, 0]

// you need Int8 array
let buffInt8 = buffUInt8.map{ Int8(bitPattern: $0)}
let str = String.fromCString(buffInt8) // "absdrfgh"

alternatively you can use

String.fromCStringRepairingIllFormedUTF8(cs: UnsafePointer<CChar>) -> (String?, hadError: Bool)

Convert UInt8 to character UTF8

You're converting each byte separately to the string, while UTF-8 is multibyte encoding, only characters that could be represented are 0-127(0x00-0x7f) characters from old ASCII table (numbers, basic latin characters, etc.), other characters will be encoded using 2-4 bytes.

So, UInt8 sequence should be converted using code like this:

let data = Data(bytes: array)
let string = String(data: data, encoding: .utf8)

How to convert Character to UInt8 in Swift

You will need to go via the String representation of the Character:s to convert to UInt8. You needn't, however, explicitly initialize an array in your [Character] -> [UInt8] conversion; since String.UTF8View (from String.utf8) is a CollectionType, you can apply a map operation on the String.UTF8View itself; with an UInt8 initialization. I.e.,

let charArray: [Character] = [ "S", "t", "r", "i", "n", "g"]
let asUInt8Array = String(charArray).utf8.map{ UInt8($0) }

print(asUInt8Array)
/* [83, 116, 114, 105, 110, 103] */

print(asUInt8Array.dynamicType)
/* Array<UInt8> */

With regard to your comment below ("frustrated over the abstraction of Swift, as compared to the simple ways of Objective-C"): if you believe the above to be messy, you could include it in an extension to SequenceType constrained to Character elements, allowing easier use in practice. E.g.:

extension SequenceType where Generator.Element == Character {

/* extension accessible as function */
func asByteArray() -> [UInt8] {
return String(self).utf8.map{UInt8($0)}
}

/* or, as @LeoDabus pointed out below (thanks!),
use a computed property for this simple case */
var byteArray : [UInt8] {
return String(self).utf8.map{UInt8($0)}
}
}

Usage example:

let charArray: [Character] = [ "S", "t", "r", "i", "n", "g"]

/* use extension function */
let asUInt8Array = charArray.asByteArray()

/* or computed property */
let asUInt8Array = charArray.byteArray

How to cast decrypted UInt8 to String?

You'll want Foundation to decode the UTF8 for you since there's no way to generate a String.UTF8View directly. So convert to NSData first.

let decrypted: [UInt8] = [0x48, 0x65, 0x6c, 0x6c, 0x6f]
let data = NSData(bytes: decrypted, length: decrypted.count)
let str = String(data: data, encoding: NSUTF8StringEncoding)

If you want to do it without Foundation, you can, but it's a little work. You have to manage the decoding yourself.

extension String {
init?(utf8Bytes: [UInt8]) {
var decoder = UTF8()
var g = utf8Bytes.generate()
var characters: [Character] = []
LOOP:
while true {
let result = decoder.decode(&g)
switch result {
case .Result(let scalar): characters.append(Character(scalar))
case .EmptyInput: break LOOP
case .Error: return nil
}
}
self.init(characters)
}
}

let unicode = String(utf8Bytes: bytes)

(I'm very surprised that this isn't built into Swift stdlib since it's so common and can be quickly built out of other parts of Swift stdlib. Often when that's the case, there's a reason that I'm just not aware of yet, so there may be some subtle problem with my approach here.)

Decoding strings including utf8-literals like '\xc3\xa6' in Swift?

To start with add the decoding code into a String extension as a computed property (or create a function)

extension String {
var decodeUTF8: String {
let bytes = self.components(separatedBy: "\\x")
.dropFirst()
.compactMap { UInt8($0, radix: 16) }
return String(bytes: bytes, encoding: .utf8) ?? self
}
}

Then use a regular expression and match using a while loop to replace all matching values

while let range = string.range(of: #"(\\x[a-f0-9]{2}){2}"#, options: [.regularExpression, .caseInsensitive]) {
string.replaceSubrange(range, with: String(string[range]).decodeUTF8)
}

Is there a way to create a String from utf16 array in swift?

Update for Swift 2.1:

You can create a String from an array of UTF-16 characters
with the

public init(utf16CodeUnits: UnsafePointer<unichar>, count: Int)

initializer. Example:

let str = "H€llo br>
// String to UTF16 array:
let utf16array = Array(str.utf16)
print(utf16array)
// Output: [72, 8364, 108, 108, 111, 32, 55357, 56836]

// UTF16 array to string:
let str2 = String(utf16CodeUnits: utf16array, count: utf16array.count)
print(str2)
// H€llo br>

Previous answer:

There is nothing "built-in" (as far as I know), but you can use the UTF16 struct
which provides a decode() method:

extension String {

init?(utf16chars:[UInt16]) {
var str = ""
var generator = utf16chars.generate()
var utf16 : UTF16 = UTF16()
var done = false
while !done {
let r = utf16.decode(&generator)
switch (r) {
case .EmptyInput:
done = true
case let .Result(val):
str.append(Character(val))
case .Error:
return nil
}
}
self = str
}
}

Example:

let str = "H€llo br>
// String to UTF16 array:
let utf16array = Array(str.utf16)
print(utf16array)
// Output: [72, 8364, 108, 108, 111, 32, 55357, 56836]

// UTF16 array to string:
if let str2 = String(utf16chars: utf16array) {
print(str2)
// Output: H€llo br>}

Slightly more generic, you could define a method that creates a string
from an array (or any sequence) of code points, using a given codec:

extension String {
init?<S : SequenceType, C : UnicodeCodecType where S.Generator.Element == C.CodeUnit>
(codeUnits : S, var codec : C) {
var str = ""
var generator = codeUnits.generate()
var done = false
while !done {
let r = codec.decode(&generator)
switch (r) {
case .EmptyInput:
done = true
case let .Result(val):
str.append(Character(val))
case .Error:
return nil
}
}
self = str
}
}

Then the conversion from UTF16 is done as

if let str2a = String(codeUnits: utf16array, codec: UTF16()) {
print(str2a)
}

Here is another possible solution. While the previous methods are "pure Swift", this one uses the Foundation framework and the automatic
bridging between NSString and Swift String:

extension String {

init?(utf16chars:[UInt16]) {
let data = NSData(bytes: utf16chars, length: utf16chars.count * sizeof(UInt16))
if let ns = NSString(data: data, encoding: NSUTF16LittleEndianStringEncoding) {
self = ns as String
} else {
return nil
}
}
}

Swift Cannot Assign to immutable value of type NSData

update: Xcode 7.2 • Swift 2.1.1

let myTestString = "Hello World"

// converting from string to NSData
if let myStringData = myTestString.dataUsingEncoding(NSUTF8StringEncoding) {

// converting from NSData to String
let myStringFromData = String(data: myStringData, encoding: NSUTF8StringEncoding) ?? "Hello World"
}

round trip Swift number types to/from Data

Note: The code has been updated for Swift 5 (Xcode 10.2) now. (Swift 3 and Swift 4.2 versions can be found in the edit history.) Also possibly unaligned data is now correctly handled.

How to create Data from a value

As of Swift 4.2, data can be created from a value simply with

let value = 42.13
let data = withUnsafeBytes(of: value) { Data($0) }

print(data as NSData) // <713d0ad7 a3104540>

Explanation:

  • withUnsafeBytes(of: value)
    invokes the closure with a buffer pointer covering the raw bytes of the value.
  • A raw buffer pointer is a sequence of bytes, therefore Data($0) can be used to create the data.

How to retrieve a value from Data

As of Swift 5, the withUnsafeBytes(_:) of Data invokes the closure with an “untyped” UnsafeMutableRawBufferPointer to the bytes. The load(fromByteOffset:as:) method the reads the value from the memory:

let data = Data([0x71, 0x3d, 0x0a, 0xd7, 0xa3, 0x10, 0x45, 0x40])
let value = data.withUnsafeBytes {
$0.load(as: Double.self)
}
print(value) // 42.13

There is one problem with this approach: It requires that the memory is property aligned for the type (here: aligned to a 8-byte address). But that is not guaranteed, e.g. if the data was obtained as a slice of another Data value.

It is therefore safer to copy the bytes to the value:

let data = Data([0x71, 0x3d, 0x0a, 0xd7, 0xa3, 0x10, 0x45, 0x40])
var value = 0.0
let bytesCopied = withUnsafeMutableBytes(of: &value, { data.copyBytes(to: $0)} )
assert(bytesCopied == MemoryLayout.size(ofValue: value))
print(value) // 42.13

Explanation:

  • withUnsafeMutableBytes(of:_:) invokes the closure with a mutable buffer pointer covering the raw bytes of the value.
  • The copyBytes(to:) method of DataProtocol (to which Data conforms) copies bytes from the data to that buffer.

The return value of copyBytes() is the number of bytes copied. It is equal to the size of the destination buffer, or less if the data does not contain enough bytes.

Generic solution #1

The above conversions can now easily be implemented as generic methods of struct Data:

extension Data {

init<T>(from value: T) {
self = Swift.withUnsafeBytes(of: value) { Data($0) }
}

func to<T>(type: T.Type) -> T? where T: ExpressibleByIntegerLiteral {
var value: T = 0
guard count >= MemoryLayout.size(ofValue: value) else { return nil }
_ = Swift.withUnsafeMutableBytes(of: &value, { copyBytes(to: $0)} )
return value
}
}

The constraint T: ExpressibleByIntegerLiteral is added here so that we can easily initialize the value to “zero” – that is not really a restriction because this method can be used with “trival” (integer and floating point) types anyway, see below.

Example:

let value = 42.13 // implicit Double
let data = Data(from: value)
print(data as NSData) // <713d0ad7 a3104540>

if let roundtrip = data.to(type: Double.self) {
print(roundtrip) // 42.13
} else {
print("not enough data")
}

Similarly, you can convert arrays to Data and back:

extension Data {

init<T>(fromArray values: [T]) {
self = values.withUnsafeBytes { Data($0) }
}

func toArray<T>(type: T.Type) -> [T] where T: ExpressibleByIntegerLiteral {
var array = Array<T>(repeating: 0, count: self.count/MemoryLayout<T>.stride)
_ = array.withUnsafeMutableBytes { copyBytes(to: $0) }
return array
}
}

Example:

let value: [Int16] = [1, Int16.max, Int16.min]
let data = Data(fromArray: value)
print(data as NSData) // <0100ff7f 0080>

let roundtrip = data.toArray(type: Int16.self)
print(roundtrip) // [1, 32767, -32768]

Generic solution #2

The above approach has one disadvantage: It actually works only with "trivial"
types like integers and floating point types. "Complex" types like Array
and String have (hidden) pointers to the underlying storage and cannot be
passed around by just copying the struct itself. It also would not work with
reference types which are just pointers to the real object storage.

So solve that problem, one can

  • Define a protocol which defines the methods for converting to Data and back:

    protocol DataConvertible {
    init?(data: Data)
    var data: Data { get }
    }
  • Implement the conversions as default methods in a protocol extension:

    extension DataConvertible where Self: ExpressibleByIntegerLiteral{

    init?(data: Data) {
    var value: Self = 0
    guard data.count == MemoryLayout.size(ofValue: value) else { return nil }
    _ = withUnsafeMutableBytes(of: &value, { data.copyBytes(to: $0)} )
    self = value
    }

    var data: Data {
    return withUnsafeBytes(of: self) { Data($0) }
    }
    }

    I have chosen a failable initializer here which checks that the number of bytes provided
    matches the size of the type.

  • And finally declare conformance to all types which can safely be converted to Data and back:

    extension Int : DataConvertible { }
    extension Float : DataConvertible { }
    extension Double : DataConvertible { }
    // add more types here ...

This makes the conversion even more elegant:

let value = 42.13
let data = value.data
print(data as NSData) // <713d0ad7 a3104540>

if let roundtrip = Double(data: data) {
print(roundtrip) // 42.13
}

The advantage of the second approach is that you cannot inadvertently do unsafe conversions. The disadvantage is that you have to list all "safe" types explicitly.

You could also implement the protocol for other types which require a non-trivial conversion, such as:

extension String: DataConvertible {
init?(data: Data) {
self.init(data: data, encoding: .utf8)
}
var data: Data {
// Note: a conversion to UTF-8 cannot fail.
return Data(self.utf8)
}
}

or implement the conversion methods in your own types to do whatever is
necessary so serialize and deserialize a value.

Byte order

No byte order conversion is done in the above methods, the data is always in
the host byte order. For a platform independent representation (e.g.
“big endian” aka “network” byte order), use the corresponding integer
properties resp. initializers. For example:

let value = 1000
let data = value.bigEndian.data
print(data as NSData) // <00000000 000003e8>

if let roundtrip = Int(data: data) {
print(Int(bigEndian: roundtrip)) // 1000
}

Of course this conversion can also be done generally, in the generic
conversion method.



Related Topics



Leave a reply



Submit