Change LString to use an array of bytes instead of wrapping Java's String.

This brings our implementation more in line with regular C Lua.
This commit is contained in:
Ian Farmer
2007-09-10 06:27:54 +00:00
parent aa44eedf4b
commit aeafca11b6
17 changed files with 274 additions and 84 deletions

View File

@@ -152,20 +152,22 @@ public class LuaCompat extends LFunction {
// String functions // String functions
case REP: { case REP: {
String s = vm.getArgAsString( 0 ); LString s = vm.getArgAsLuaString( 0 );
int n = vm.getArgAsInt( 1 ); int n = vm.getArgAsInt( 1 );
if ( n >= 0 ) { if ( n >= 0 ) {
StringBuffer sb = new StringBuffer( s.length() * n ); final byte[] bytes = new byte[ s.length() * n ];
for ( int i = 0; i < n; ++i ) { int len = s.length();
sb.append( s ); for ( int offset = 0; offset < bytes.length; offset += len ) {
s.copyInto( 0, bytes, offset, len );
} }
vm.setResult( new LString( sb.toString() ) );
vm.setResult( new LString( bytes ) );
} else { } else {
vm.setResult( LNil.NIL ); vm.setResult( LNil.NIL );
} }
} break; } break;
case SUB: { case SUB: {
String s = vm.getArgAsString( 0 ); final LString s = vm.getArgAsLuaString( 0 );
final int len = s.length(); final int len = s.length();
int i = vm.getArgAsInt( 1 ); int i = vm.getArgAsInt( 1 );
@@ -184,8 +186,8 @@ public class LuaCompat extends LFunction {
j = Math.min( Math.max( i, j ), len ); j = Math.min( Math.max( i, j ), len );
} }
String result = s.substring( i, j ); LString result = s.substring( i, j );
vm.setResult( new LString( result ) ); vm.setResult( result );
} break; } break;
default: default:
@@ -298,7 +300,7 @@ public class LuaCompat extends LFunction {
String script; String script;
if ( fileName != null ) { if ( fileName != null ) {
script = fileName.luaAsString(); script = fileName.luaAsString().toJavaString();
is = getClass().getResourceAsStream( "/"+script ); is = getClass().getResourceAsStream( "/"+script );
} else { } else {
is = System.in; is = System.in;

View File

@@ -65,7 +65,7 @@ public class CoerceLuaToJava {
}; };
Coercion stringCoercion = new Coercion() { Coercion stringCoercion = new Coercion() {
public Object coerce(LValue value) { public Object coerce(LValue value) {
return value.luaAsString(); return value.luaAsString().toJavaString();
} }
public int score(LValue value) { public int score(LValue value) {
if ( value instanceof LUserData ) if ( value instanceof LUserData )
@@ -78,7 +78,7 @@ public class CoerceLuaToJava {
if ( value instanceof LUserData ) if ( value instanceof LUserData )
return ((LUserData)value).m_instance; return ((LUserData)value).m_instance;
if ( value instanceof LString ) if ( value instanceof LString )
return value.luaAsString(); return value.luaAsString().toJavaString();
if ( value instanceof LInteger ) if ( value instanceof LInteger )
return Integer.valueOf(value.luaAsInt()); return Integer.valueOf(value.luaAsInt());
if ( value instanceof LDouble ) if ( value instanceof LDouble )

View File

@@ -17,6 +17,7 @@ import java.util.Map;
import lua.GlobalState; import lua.GlobalState;
import lua.VM; import lua.VM;
import lua.value.LFunction; import lua.value.LFunction;
import lua.value.LString;
import lua.value.LTable; import lua.value.LTable;
import lua.value.LUserData; import lua.value.LUserData;
import lua.value.LValue; import lua.value.LValue;
@@ -121,7 +122,7 @@ public final class LuaJava extends LFunction {
this.clazz = clazz; this.clazz = clazz;
} }
public void luaGetTable(VM vm, LValue table, LValue key) { public void luaGetTable(VM vm, LValue table, LValue key) {
final String s = key.luaAsString(); final String s = key.luaAsString().toJavaString();
try { try {
Field f = clazz.getField(s); Field f = clazz.getField(s);
Object o = f.get(m_instance); Object o = f.get(m_instance);
@@ -135,7 +136,7 @@ public final class LuaJava extends LFunction {
} }
public void luaSetTable(VM vm, LValue table, LValue key, LValue val) { public void luaSetTable(VM vm, LValue table, LValue key, LValue val) {
Class c = m_instance.getClass(); Class c = m_instance.getClass();
String s = key.luaAsString(); String s = key.luaAsString().toJavaString();
try { try {
Field f = c.getField(s); Field f = c.getField(s);
Object v = CoerceLuaToJava.coerceArg(val, f.getType()); Object v = CoerceLuaToJava.coerceArg(val, f.getType());

View File

@@ -1,6 +1,7 @@
package lua; package lua;
import lua.io.Closure; import lua.io.Closure;
import lua.value.LString;
import lua.value.LValue; import lua.value.LValue;
public interface VM { public interface VM {
@@ -114,6 +115,13 @@ public interface VM {
*/ */
public String getArgAsString( int index ); public String getArgAsString( int index );
/**
* Get the index-th argument as an LString value, or "" if fewer than index arguments were supplied.
* @param index
* @return
*/
public LString getArgAsLuaString( int index );
/** Set top to base in preparation for pushing return values. /** Set top to base in preparation for pushing return values.
* Can be used when returning no values. * Can be used when returning no values.
* *

View File

@@ -99,8 +99,7 @@ public class LoadState {
return null; return null;
byte[] bytes = new byte[size]; byte[] bytes = new byte[size];
is.readFully( bytes ); is.readFully( bytes );
String s = new String( bytes, 0, size-1 ); return new LString( bytes, 0, bytes.length - 1 );
return new LString( s );
} }
static LNumber longBitsToLuaNumber( long bits ) { static LNumber longBitsToLuaNumber( long bits ) {

View File

@@ -8,15 +8,15 @@ public final class LBoolean extends LValue {
public static final LString TYPE_NAME = new LString("boolean"); public static final LString TYPE_NAME = new LString("boolean");
private final String m_name; private final LString m_name;
private final boolean m_value; private final boolean m_value;
private LBoolean( String name, boolean value ) { private LBoolean( String name, boolean value ) {
this.m_name = name; this.m_name = new LString( name );
this.m_value = value; this.m_value = value;
} }
public final String luaAsString() { public final LString luaAsString() {
return m_name; return m_name;
} }

View File

@@ -14,8 +14,8 @@ public class LDouble extends LNumber {
return (int) m_value; return (int) m_value;
} }
public String luaAsString() { public LString luaAsString() {
return String.valueOf(m_value); return LString.valueOf( m_value );
} }
public boolean isInteger() { public boolean isInteger() {

View File

@@ -7,8 +7,8 @@ public class LFunction extends LValue {
public static final LString TYPE_NAME = new LString("function"); public static final LString TYPE_NAME = new LString("function");
public String luaAsString() { public LString luaAsString() {
return "function: "+hashCode(); return new LString( "function: "+hashCode() );
} }
public void luaSetTable(VM vm, LValue table, LValue key, LValue val) { public void luaSetTable(VM vm, LValue table, LValue key, LValue val) {

View File

@@ -22,8 +22,8 @@ public class LInteger extends LNumber {
return m_value; return m_value;
} }
public String luaAsString() { public LString luaAsString() {
return String.valueOf(m_value); return LString.valueOf(m_value);
} }
public boolean isInteger() { public boolean isInteger() {

View File

@@ -4,8 +4,8 @@ public final class LNil extends LValue {
public static final LNil NIL = new LNil(); public static final LNil NIL = new LNil();
public static final LString TYPE_NAME = new LString("nil"); public static final LString TYPE_NAME = new LString("nil");
public final String luaAsString() { public final LString luaAsString() {
return "nil"; return TYPE_NAME;
} }
public boolean luaAsBoolean() { public boolean luaAsBoolean() {

View File

@@ -1,42 +1,175 @@
package lua.value; package lua.value;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import lua.Lua; import lua.Lua;
/**
* A String implementation for Lua using bytes instead of chars.
*
* This should have the following advantages:
*
* (1) We can use strings as byte buffers, as Lua does, and therefore avoid
* questions about how to adapt Lua APIs that use strings with binary data.
*
* (2) Half the memory usage when strings are primarily ASCII
*
*
* TODO: Decide if/when to copy the bytes to a new array to ensure memory does
* not "leak" in the form of unused portions of byte arrays. Currently, for
* efficiency, new LStrings and substrings never create copies.
*/
public class LString extends LValue { public class LString extends LValue {
public static final LString TYPE_NAME = new LString("string"); public static final LString TYPE_NAME = new LString("string");
final String m_string; final byte[] m_bytes;
final int m_offset;
final int m_length;
final int m_hash; final int m_hash;
private static LTable s_stringMT; private static LTable s_stringMT;
/**
* Construct a Lua string from the given Java string. Characters are encoded
* using UTF-8.
*/
public LString(String string) { public LString(String string) {
this.m_string = string; byte[] bytes;
this.m_hash = string.hashCode(); try {
bytes = string.getBytes( "UTF-8" );
} catch ( UnsupportedEncodingException exn ) {
bytes = stringToUtf8Bytes( string );
}
this.m_bytes = bytes;
this.m_offset = 0;
this.m_length = m_bytes.length;
this.m_hash = hashBytes( m_bytes, 0, m_length );
} }
/**
* Construct a string from the given byte array.
*
* new LString(b) is identical to new LString(b, 0, b.length)
*/
public LString(byte[] bytes) {
this( bytes, 0, bytes.length );
}
/**
* Construct a string from the given byte array and range. For efficiency,
* the byte array is not copied. Lua strings are immutable so the bytes must
* not be modified after the string is constructed.
*/
public LString(byte[] bytes, int off, int len) {
if ( off < 0 || len < 0 || off+len > bytes.length )
throw new IndexOutOfBoundsException();
this.m_bytes = bytes;
this.m_offset = off;
this.m_length = len;
this.m_hash = hashBytes( bytes, off, len );
}
public boolean equals(Object o) { public boolean equals(Object o) {
if ( o != null && o instanceof LString ) { if ( o != null && o instanceof LString ) {
LString s = (LString) o; LString s = (LString) o;
return m_hash == s.m_hash && m_string.equals(s.m_string); return m_hash == s.m_hash &&
m_length == s.m_length &&
( ( m_bytes == s.m_bytes && m_offset == s.m_offset ) ||
equals( m_bytes, m_offset, s.m_bytes, s.m_offset, m_length ) );
} }
return false; return false;
} }
public int compareTo( LString o ) {
final byte[] a = this.m_bytes;
final byte[] b = o.m_bytes;
int i = this.m_offset;
int j = o.m_offset;
final int imax = i + m_length;
final int jmax = j + o.m_length;
if ( a == b && i == j && imax == jmax )
return 0;
while ( i < imax && j < jmax ) {
if ( a[i] != b[i] ) {
return ( ( (int)a[i] ) & 0x0FF ) - ( ( (int)b[j] ) & 0x0FF );
}
i++;
j++;
}
return m_length - o.m_length;
}
public int hashCode() { public int hashCode() {
return m_hash; return m_hash;
} }
public int length() {
return m_length;
}
public LString substring( int beginIndex, int endIndex ) {
return new LString( m_bytes, m_offset + beginIndex, endIndex - beginIndex );
}
public static LString valueOf( double d ) {
return new LString( String.valueOf( d ) );
}
public static LString valueOf( int x ) {
return new LString( String.valueOf( x ) );
}
public static LString concat( final LString[] strings ) {
int length = 0;
for ( int i = 0; i < strings.length; ++i ) {
length += strings[i].length();
}
byte[] bytes = new byte[length];
for ( int i = 0, offset = 0; i < strings.length; ++i ) {
LString s = strings[i];
final int len = s.length();
System.arraycopy( s.m_bytes, s.m_offset, bytes, offset, len );
offset += len;
}
return new LString( bytes );
}
/**
* Write the specified substring of this string to the given output stream.
*/
public void write( OutputStream os, int offset, int len ) throws IOException {
if ( offset < 0 || len < 0 )
throw new IndexOutOfBoundsException();
if ( offset + len > m_length )
throw new IndexOutOfBoundsException();
os.write( m_bytes, m_offset+offset, len );
}
/**
* Copy the bytes of the string into the given byte array.
*/
public void copyInto( int strOffset, byte[] bytes, int arrayOffset, int len ) {
System.arraycopy( m_bytes, m_offset+strOffset, bytes, arrayOffset, len );
}
public boolean luaBinCmpUnknown(int opcode, LValue lhs) { public boolean luaBinCmpUnknown(int opcode, LValue lhs) {
return lhs.luaBinCmpString(opcode, m_string); return lhs.luaBinCmpString(opcode, this);
} }
public boolean luaBinCmpString(int opcode, String rhs) { public boolean luaBinCmpString(int opcode, LString rhs) {
switch ( opcode ) { switch ( opcode ) {
case Lua.OP_EQ: return m_string.equals(rhs); case Lua.OP_EQ: return equals(rhs);
case Lua.OP_LT: return m_string.compareTo(rhs) < 0; case Lua.OP_LT: return compareTo(rhs) < 0;
case Lua.OP_LE: return m_string.compareTo(rhs) <= 0; case Lua.OP_LE: return compareTo(rhs) <= 0;
} }
luaUnsupportedOperation(); luaUnsupportedOperation();
return false; return false;
@@ -64,7 +197,7 @@ public class LString extends LValue {
public LValue luaToNumber( int base ) { public LValue luaToNumber( int base ) {
if ( base >= 2 && base <= 36 ) { if ( base >= 2 && base <= 36 ) {
String str = m_string.trim(); String str = toJavaString().trim();
try { try {
return new LInteger( Integer.parseInt( str, base ) ); return new LInteger( Integer.parseInt( str, base ) );
} catch ( NumberFormatException nfe ) { } catch ( NumberFormatException nfe ) {
@@ -80,13 +213,21 @@ public class LString extends LValue {
return LNil.NIL; return LNil.NIL;
} }
public String luaAsString() { public LString luaAsString() {
return m_string; return this;
} }
public String toJavaString() {
try {
return new String( m_bytes, m_offset, m_length, "UTF-8" );
} catch ( UnsupportedEncodingException uee ) {
throw new RuntimeException("toJavaString: UTF-8 decoding not implemented");
}
}
/** Built-in opcode LEN, for Strings and Tables */ /** Built-in opcode LEN, for Strings and Tables */
public LValue luaLength() { public LValue luaLength() {
return new LInteger( m_string.length() ); return new LInteger( length() );
} }
public LString luaGetType() { public LString luaGetType() {
@@ -112,4 +253,72 @@ public class LString extends LValue {
} }
return s_stringMT; return s_stringMT;
} }
public static boolean equals( byte[] a, int i, byte[] b, int j, int n ) {
final int imax = i + n;
final int jmax = j + n;
while ( i < imax && j < jmax ) {
if ( a[i++] != b[j++] )
return false;
}
return true;
}
private static int hashBytes( byte[] bytes, int offset, int length ) {
// Compute the hash of the given bytes.
// This code comes right out of Lua 5.1.2 (translated from C to Java)
int h = length; /* seed */
int step = (length>>5)+1; /* if string is too long, don't hash all its chars */
for (int l1=length; l1>=step; l1-=step) /* compute hash */
h = h ^ ((h<<5)+(h>>2)+(((int) bytes[offset+l1-1] ) & 0x0FF ));
return h;
}
private static byte[] stringToUtf8Bytes( final String string ) {
final int strlen = string.length();
byte[] bytes = new byte[ strlen ];
byte b1 = 0, b2 = 0, b3 = 0;
int j = 0;
for ( int i = 0; i < strlen; ++i ) {
int c = string.charAt( i );
// TODO: combine 2-character combinations
int count;
if ( c > 0x07FF ) {
count = 3;
b3 = (byte)( 0xE0 | ( c >> 12 ) );
b2 = (byte)( 0x80 | ( ( c >> 6 ) & 0x03F ) );
b1 = (byte)( 0x80 | ( ( c ) & 0x03F ) );
} else if ( c > 0x07F ) {
count = 2;
b2 = (byte)( 0xC0 | ( c >> 6 ) );
b1 = (byte)( 0x80 | ( c & 0x03F ) );
} else {
count = 1;
b1 = (byte) c;
}
if ( j + count > bytes.length ) {
bytes = realloc( bytes, ( j + count ) * 2 );
}
switch ( count ) {
case 3:
bytes[j++] = b3;
case 2:
bytes[j++] = b2;
case 1:
bytes[j++] = b1;
}
}
if ( j != bytes.length ) {
bytes = realloc( bytes, j );
}
return bytes;
}
private static byte[] realloc( byte[] a, int newSize ) {
final byte[] newbytes = new byte[ newSize ];
System.arraycopy( a, 0, newbytes, 0, Math.min( newSize, a.length ) );
return newbytes;
}
} }

View File

@@ -128,16 +128,7 @@ public class LTable extends LValue {
* initializing a table. Bypasses the metatable, if any. * initializing a table. Bypasses the metatable, if any.
*/ */
public void put( String key, LValue value ) { public void put( String key, LValue value ) {
if (value == null || value == LNil.NIL) { put( new LString( key ), value );
remove( key );
return;
}
if (checkLoadFactor())
rehash();
int slot = findSlot( key );
if (fillHashSlot( slot, value ))
return;
m_hashKeys[slot] = new LString( key );
} }
/** /**
@@ -267,8 +258,8 @@ public class LTable extends LValue {
(LTable) metatable : null; (LTable) metatable : null;
} }
public String luaAsString() { public LString luaAsString() {
return "table: "+id(); return new LString("table: "+id());
} }
public LString luaGetType() { public LString luaGetType() {
@@ -334,13 +325,6 @@ public class LTable extends LValue {
} }
} }
private void remove( String key ) {
if ( m_hashKeys != null ) {
int slot = findSlot( key );
clearSlot( slot );
}
}
private void remove( LValue key ) { private void remove( LValue key ) {
if ( m_hashKeys != null ) { if ( m_hashKeys != null ) {
int slot = findSlot( key ); int slot = findSlot( key );
@@ -385,20 +369,6 @@ public class LTable extends LValue {
} }
return i; return i;
} }
private int findSlot( String key ) {
// NOTE: currently LString uses the String's hashCode.
int i = hashToIndex( key.hashCode() );
// This loop is guaranteed to terminate as long as we never allow the
// table to get 100% full.
LValue k;
while ( ( k = m_hashKeys[i] ) != null &&
!k.luaBinCmpString( Lua.OP_EQ, key ) ) {
i = ( i + 1 ) % m_hashKeys.length;
}
return i;
}
private int findSlot( int key ) { private int findSlot( int key ) {
int i = hashToIndex( LInteger.hashCodeOf( key ) ); int i = hashToIndex( LInteger.hashCodeOf( key ) );

View File

@@ -7,4 +7,7 @@ public class LThread extends LValue {
return TYPE_NAME; return TYPE_NAME;
} }
public LString luaAsString() {
return new LString("thread: "+hashCode());
}
} }

View File

@@ -10,8 +10,8 @@ public class LUserData extends LValue {
m_instance = obj; m_instance = obj;
} }
public String luaAsString() { public LString luaAsString() {
return m_instance.toString(); return new LString(m_instance.toString());
} }
public boolean equals(Object obj) { public boolean equals(Object obj) {

View File

@@ -61,7 +61,7 @@ public class LValue {
} }
// unsupported except for strings // unsupported except for strings
public boolean luaBinCmpString(int opcode, String rhs) { public boolean luaBinCmpString(int opcode, LString rhs) {
if ( opcode == Lua.OP_EQ ) if ( opcode == Lua.OP_EQ )
return false; return false;
luaUnsupportedOperation(); luaUnsupportedOperation();
@@ -122,13 +122,11 @@ public class LValue {
/** Get the value as a String /** Get the value as a String
*/ */
public String luaAsString() { public abstract LString luaAsString();
return super.toString();
}
/** Override standard toString with lua String conversion by default */ /** Override standard toString with lua String conversion by default */
public String toString() { public String toString() {
return luaAsString(); return luaAsString().toJavaString();
} }
/** Return value as an integer */ /** Return value as an integer */

View File

@@ -93,7 +93,7 @@ public class StandardTest extends TestCase {
CallInfo call = state.calls[i]; CallInfo call = state.calls[i];
Proto p = call.closure.p; Proto p = call.closure.p;
int line = p.lineinfo[call.pc]; int line = p.lineinfo[call.pc];
String func = call.closure.luaAsString(); String func = call.closure.luaAsString().toJavaString();
stackTrace[state.cc - i] = new StackTraceElement(getName(), func, getName()+".lua", line ); stackTrace[state.cc - i] = new StackTraceElement(getName(), func, getName()+".lua", line );
} }

View File

@@ -72,7 +72,7 @@ public class LTableTest extends TestCase {
assertTrue( ( intKeys & mask ) == 0 ); assertTrue( ( intKeys & mask ) == 0 );
intKeys |= mask; intKeys |= mask;
} else if ( k instanceof LString ) { } else if ( k instanceof LString ) {
final int ik = Integer.parseInt( k.luaAsString() ); final int ik = Integer.parseInt( k.luaAsString().toJavaString() );
assertEquals( String.valueOf( ik ), k.luaAsString() ); assertEquals( String.valueOf( ik ), k.luaAsString() );
assertTrue( ik >= 0 && ik < 10 ); assertTrue( ik >= 0 && ik < 10 );
final int mask = 1 << ik; final int mask = 1 << ik;