Started with upgrading to Lua 5.3

This commit is contained in:
UnlegitDqrk
2026-03-01 21:42:37 +01:00
parent 493b055a26
commit 1d3459e0d3

View File

@@ -0,0 +1,264 @@
package org.luaj.vm2.libs;
import org.luaj.vm2.LuaError;
import org.luaj.vm2.LuaString;
import org.luaj.vm2.LuaTable;
import org.luaj.vm2.LuaValue;
import org.luaj.vm2.Varargs;
/**
* Lua 5.3 utf8 library.
*/
public class Utf8Lib extends TwoArgFunction {
private static final LuaString CHARPATTERN = LuaValue.valueOf(
"[\u0000-\u007F\u00C2-\u00F4][\u0080-\u00BF]*");
public LuaValue call(LuaValue modname, LuaValue env) {
LuaTable utf8 = new LuaTable();
utf8.set("char", new utf8_char());
utf8.set("codes", new codes());
utf8.set("codepoint", new codepoint());
utf8.set("len", new len());
utf8.set("offset", new offset());
utf8.set("charpattern", CHARPATTERN);
env.set("utf8", utf8);
if (!env.get("package").isnil()) {
env.get("package").get("loaded").set("utf8", utf8);
}
return utf8;
}
static final class utf8_char extends VarArgFunction {
public Varargs invoke(Varargs args) {
int n = args.narg();
if (n == 0) {
return LuaValue.EMPTYSTRING;
}
StringBuilder builder = new StringBuilder();
for (int i = 1; i <= n; i++) {
long codepoint = args.checklong(i);
appendCodePoint(builder, codepoint);
}
return LuaValue.valueOf(builder.toString());
}
}
static final class codepoint extends VarArgFunction {
public Varargs invoke(Varargs args) {
LuaString s = args.checkstring(1);
int len = s.rawlen();
int start = relativeIndex(args.optint(2, 1), len);
int end = relativeIndex(args.optint(3, start), len);
if (start < 1 || start > len + 1) {
argerror(2, "out of range");
}
if (end < start - 1 || end > len) {
argerror(3, "out of range");
}
if (start > end) {
return NONE;
}
LuaValue[] values = new LuaValue[end - start + 1];
int count = 0;
int pos = start - 1;
int limit = end;
while (pos < limit) {
Decoded decoded = decode(s, pos);
if (decoded.next > limit) {
throw new LuaError("invalid UTF-8 code");
}
values[count++] = LuaValue.valueOf(decoded.codepoint);
pos = decoded.next;
}
if (pos != limit) {
throw new LuaError("invalid UTF-8 code");
}
return LuaValue.varargsOf(values);
}
}
static final class len extends VarArgFunction {
public Varargs invoke(Varargs args) {
LuaString s = args.checkstring(1);
int len = s.rawlen();
int start = relativeIndex(args.optint(2, 1), len);
int end = relativeIndex(args.optint(3, -1), len);
if (start < 1 || start > len + 1) {
argerror(2, "out of range");
}
if (end < start - 1 || end > len) {
argerror(3, "out of range");
}
int pos = start - 1;
int count = 0;
int limit = end;
while (pos < limit) {
try {
Decoded decoded = decode(s, pos);
if (decoded.next > limit) {
return LuaValue.varargsOf(new LuaValue[] { NIL, LuaValue.valueOf(pos + 1) });
}
pos = decoded.next;
count++;
} catch (LuaError e) {
return LuaValue.varargsOf(new LuaValue[] { NIL, LuaValue.valueOf(pos + 1) });
}
}
if (pos != limit) {
return LuaValue.varargsOf(new LuaValue[] { NIL, LuaValue.valueOf(pos + 1) });
}
return LuaValue.valueOf(count);
}
}
static final class offset extends VarArgFunction {
public Varargs invoke(Varargs args) {
LuaString s = args.checkstring(1);
int n = args.checkint(2);
int len = s.rawlen();
int i = args.narg() >= 3 ? relativeIndex(args.checkint(3), len) : (n >= 0 ? 1 : len + 1);
if (i < 1 || i > len + 1) {
argerror(3, "position out of range");
}
if (n == 0) {
if (i == len + 1) {
if (len == 0) {
return NIL;
}
i = len;
}
while (i > 1 && isContinuation(s.luaByte(i - 1))) {
i--;
}
if (i <= len && isContinuation(s.luaByte(i - 1))) {
throw new LuaError("initial position is a continuation byte");
}
return LuaValue.valueOf(i);
}
int pos = i;
if (n > 0) {
pos--;
while (n > 0) {
if (pos >= len) {
return NIL;
}
Decoded decoded = decode(s, pos);
pos = decoded.next;
n--;
}
return LuaValue.valueOf(pos + 1);
}
pos--;
while (n < 0) {
if (pos <= 0) {
return NIL;
}
pos--;
while (pos > 0 && isContinuation(s.luaByte(pos))) {
pos--;
}
if (isContinuation(s.luaByte(pos))) {
throw new LuaError("invalid UTF-8 code");
}
n++;
}
return LuaValue.valueOf(pos + 1);
}
}
static final class codes extends VarArgFunction {
public Varargs invoke(Varargs args) {
LuaValue arg = args.arg1();
LuaString s = arg.checkstring();
return LuaValue.varargsOf(new LuaValue[] { new codes_iter(s), s, LuaValue.ZERO });
}
}
static final class codes_iter extends VarArgFunction {
private final LuaString s;
codes_iter(LuaString s) {
this.s = s;
}
public Varargs invoke(Varargs args) {
int index = args.arg(2).optint(0);
if (index < 0 || index > s.rawlen()) {
return NONE;
}
if (index == s.rawlen()) {
return NONE;
}
Decoded decoded = decode(s, index);
return LuaValue.varargsOf(new LuaValue[] {
LuaValue.valueOf(index + 1),
LuaValue.valueOf(decoded.codepoint)
});
}
}
private static int relativeIndex(int index, int len) {
return index >= 0 ? index : len + index + 1;
}
private static boolean isContinuation(int b) {
return (b & 0xC0) == 0x80;
}
private static Decoded decode(LuaString s, int pos) {
int first = s.luaByte(pos);
if (first < 0x80) {
return new Decoded(first, pos + 1);
}
int needed;
int minCodePoint;
int codepoint;
if ((first & 0xE0) == 0xC0) {
needed = 1;
minCodePoint = 0x80;
codepoint = first & 0x1F;
} else if ((first & 0xF0) == 0xE0) {
needed = 2;
minCodePoint = 0x800;
codepoint = first & 0x0F;
} else if ((first & 0xF8) == 0xF0) {
needed = 3;
minCodePoint = 0x10000;
codepoint = first & 0x07;
} else {
throw new LuaError("invalid UTF-8 code");
}
if (pos + needed >= s.rawlen()) {
throw new LuaError("invalid UTF-8 code");
}
for (int i = 1; i <= needed; i++) {
int b = s.luaByte(pos + i);
if (!isContinuation(b)) {
throw new LuaError("invalid UTF-8 code");
}
codepoint = (codepoint << 6) | (b & 0x3F);
}
if (codepoint < minCodePoint || codepoint > 0x10FFFF || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
throw new LuaError("invalid UTF-8 code");
}
return new Decoded(codepoint, pos + needed + 1);
}
private static void appendCodePoint(StringBuilder builder, long codepoint) {
if (codepoint < 0 || codepoint > 0x10FFFFL || (codepoint >= 0xD800L && codepoint <= 0xDFFFL)) {
throw new LuaError("value out of range");
}
builder.appendCodePoint((int) codepoint);
}
private static final class Decoded {
final int codepoint;
final int next;
Decoded(int codepoint, int next) {
this.codepoint = codepoint;
this.next = next;
}
}
}