From d13c124df93489e33cecfa2e96a8e56f99f73963 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Thu, 30 Aug 2018 00:36:13 +0200 Subject: [PATCH] Implement casting from (Unicode) integer to string --- compiler.go | 16 +++++++++++++++- src/runtime/string.go | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/compiler.go b/compiler.go index 540e800c..69de3de1 100644 --- a/compiler.go +++ b/compiler.go @@ -2067,6 +2067,20 @@ func (c *Compiler) parseConvert(typeFrom, typeTo types.Type, value llvm.Value) ( if typeTo.Kind() == types.String { switch typeFrom := typeFrom.Underlying().(type) { + case *types.Basic: + // Assume a Unicode code point, as that is the only possible + // value here. + // Cast to an i32 value as expected by + // runtime.stringFromUnicode. + if sizeFrom > 4 { + value = c.builder.CreateTrunc(value, llvm.Int32Type(), "") + } else if sizeFrom < 4 && typeTo.Info()&types.IsUnsigned != 0 { + value = c.builder.CreateZExt(value, llvm.Int32Type(), "") + } else if sizeFrom < 4 { + value = c.builder.CreateSExt(value, llvm.Int32Type(), "") + } + fn := c.mod.NamedFunction("runtime.stringFromUnicode") + return c.builder.CreateCall(fn, []llvm.Value{value}, ""), nil case *types.Slice: switch typeFrom.Elem().(*types.Basic).Kind() { case types.Byte: @@ -2082,7 +2096,7 @@ func (c *Compiler) parseConvert(typeFrom, typeTo types.Type, value llvm.Value) ( typeFrom := typeFrom.Underlying().(*types.Basic) sizeTo := c.targetData.TypeAllocSize(llvmTypeTo) - if typeFrom.Info() & types.IsInteger != 0 && typeTo.Info() & types.IsInteger != 0 { + if typeFrom.Info()&types.IsInteger != 0 && typeTo.Info()&types.IsInteger != 0 { // Conversion between two integers. if sizeFrom > sizeTo { return c.builder.CreateTrunc(value, llvmTypeTo, ""), nil diff --git a/src/runtime/string.go b/src/runtime/string.go index 52a1c89b..6b56aeb1 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -48,3 +48,40 @@ func stringFromBytes(x []byte) _string { } return _string{lenType(len(x)), (*byte)(buf)} } + +// Create a string from a Unicode code point. +func stringFromUnicode(x rune) _string { + array, length := encodeUTF8(x) + // Array will be heap allocated. + // The heap most likely doesn't work with blocks below 4 bytes, so there's + // no point in allocating a smaller buffer for the string here. + return _string{length, (*byte)(unsafe.Pointer(&array))} +} + +// Convert a Unicode code point into an array of bytes and its length. +func encodeUTF8(x rune) ([4]byte, lenType) { + // https://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 + // Note: this code can probably be optimized (in size and speed). + switch { + case x <= 0x7f: + return [4]byte{byte(x), 0, 0, 0}, 1 + case x <= 0x7ff: + b1 := 0xc0 | byte(x>>6) + b2 := 0x80 | byte(x&0x3f) + return [4]byte{b1, b2, 0, 0}, 2 + case x <= 0xffff: + b1 := 0xe0 | byte(x>>12) + b2 := 0x80 | byte((x>>6)&0x3f) + b3 := 0x80 | byte((x>>0)&0x3f) + return [4]byte{b1, b2, b3, 0}, 3 + case x <= 0x10ffff: + b1 := 0xf0 | byte(x>>18) + b2 := 0x80 | byte((x>>12)&0x3f) + b3 := 0x80 | byte((x>>6)&0x3f) + b4 := 0x80 | byte((x>>0)&0x3f) + return [4]byte{b1, b2, b3, b4}, 4 + default: + // Invalid Unicode code point. + return [4]byte{0xef, 0xbf, 0xbd, 0}, 3 + } +}