You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
3.4 KiB
C

#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include "lua.h"
#include "lauxlib.h"
inline static int
_steps(uint8_t c) {
if(c < 0x80) return 1;
if(c < 0xc0) return 0;
if(c < 0xe0) return 2;
if(c < 0xf0) return 3;
if(c < 0xf8) return 4;
return 0;
}
inline static int
_bytes(uint32_t rune) {
if(rune < 0x80) return 1;
if(rune < 0x800) return 2;
if(rune < 0x10000) return 3;
if(rune < 0x110000) return 4;
return 0;
}
inline static uint32_t
_decode(const char *str, int i, int step) {
uint8_t c = str[i];
uint32_t v = c & (0xff >> step);
int j = 1;
for(;j<step; j++) {
v = v << 6;
v = v | (str[i+j] & 0x3f);
}
return v;
}
#define FILL_LOW_BITS(str, pos, rune) str[pos] = (rune & 0x3f) | 0x80; rune >>= 6;
inline static uint8_t*
_encode(uint32_t rune, int bytes, uint8_t* str) {
if (bytes == 1) {
str[0] = rune & 0x7f;
} else if(bytes == 2) {
FILL_LOW_BITS(str, 1, rune)
str[0] = rune | 0xc0;
} else if(bytes == 3) {
FILL_LOW_BITS(str, 2, rune)
FILL_LOW_BITS(str, 1, rune)
str[0] = rune | 0xe0;
} else {
FILL_LOW_BITS(str, 3, rune)
FILL_LOW_BITS(str, 2, rune)
FILL_LOW_BITS(str, 1, rune)
str[0] = rune | 0xf0;
}
return str + bytes;
}
static int
_toutf32(lua_State *L) {
size_t len;
const char* str = luaL_checklstring(L, 1, &len);
luaL_checktype(L, 2, LUA_TTABLE);
int count = 0;
int i, step;
uint8_t c;
for(i=0;i<len;) {
c = str[i];
step = _steps(c);
if(step == 0 || len < i + step) {
count = -1;
break;
}
lua_pushinteger(L, _decode(str, i, step));
count = count + 1;
lua_rawseti(L, 2, count);
i = i + step;
}
if(count < 0) {
return 0;
}
lua_pushinteger(L, count);
return 1;
}
static int
_toutf8(lua_State *L) {
luaL_checktype(L, 1, LUA_TTABLE);
size_t sz = 0;
size_t len = lua_rawlen(L, 1);
size_t i;
for(i = 1; i <= len; i++) {
lua_rawgeti(L, 1, i);
int isnum;
uint32_t rune = (uint32_t)lua_tointegerx(L, -1, &isnum);
lua_pop(L, 1);
if(!isnum) {
return 0;
}
int bytes = _bytes(rune);
if(!bytes) {
return 0;
}
sz += bytes;
}
uint8_t *str = lua_newuserdata(L, sz);
uint8_t *tmp = str;
for(i = 1; i <= len; i++) {
lua_rawgeti(L, 1, i);
uint32_t rune = lua_tointeger(L, -1);
lua_pop(L, 1);
int bytes = _bytes(rune);
tmp = _encode(rune, bytes, tmp);
}
lua_pushlstring(L, (char*)str, sz);
return 1;
}
static int
_len(lua_State *L) {
size_t len;
const char* str = luaL_checklstring(L, 1, &len);
int count = 0;
int i, step;
uint8_t c;
for(i=0;i<len;) {
c = str[i];
step = _steps(c);
i = i + step;
if(!step || len < i) {
count = -1;
break;
}
count = count + 1;
}
if(count < 0) {
return 0;
}
lua_pushinteger(L, count);
return 1;
}
int
luaopen_utf8_c(lua_State *L) {
luaL_checkversion(L);
luaL_Reg l[] = {
{"len", _len},
{"toutf32", _toutf32},
{"toutf8", _toutf8},
{NULL, NULL}
};
luaL_newlib(L, l);
return 1;
}