diff --git a/src/addon/java/lua/addon/luacompat/LuaCompat.java b/src/addon/java/lua/addon/luacompat/LuaCompat.java index 20c8e386..2a35dc2b 100644 --- a/src/addon/java/lua/addon/luacompat/LuaCompat.java +++ b/src/addon/java/lua/addon/luacompat/LuaCompat.java @@ -152,20 +152,22 @@ public class LuaCompat extends LFunction { // String functions case REP: { - String s = vm.getArgAsString( 0 ); + LString s = vm.getArgAsLuaString( 0 ); int n = vm.getArgAsInt( 1 ); if ( n >= 0 ) { - StringBuffer sb = new StringBuffer( s.length() * n ); - for ( int i = 0; i < n; ++i ) { - sb.append( s ); + final byte[] bytes = new byte[ s.length() * n ]; + int len = s.length(); + for ( int offset = 0; offset < bytes.length; offset += len ) { + s.copyInto( 0, bytes, offset, len ); } - vm.setResult( new LString( sb.toString() ) ); + + vm.setResult( new LString( bytes ) ); } else { vm.setResult( LNil.NIL ); } } break; case SUB: { - String s = vm.getArgAsString( 0 ); + final LString s = vm.getArgAsLuaString( 0 ); final int len = s.length(); int i = vm.getArgAsInt( 1 ); @@ -184,8 +186,8 @@ public class LuaCompat extends LFunction { j = Math.min( Math.max( i, j ), len ); } - String result = s.substring( i, j ); - vm.setResult( new LString( result ) ); + LString result = s.substring( i, j ); + vm.setResult( result ); } break; default: @@ -298,7 +300,7 @@ public class LuaCompat extends LFunction { String script; if ( fileName != null ) { - script = fileName.luaAsString(); + script = fileName.luaAsString().toJavaString(); is = getClass().getResourceAsStream( "/"+script ); } else { is = System.in; diff --git a/src/addon/java/lua/addon/luajava/CoerceLuaToJava.java b/src/addon/java/lua/addon/luajava/CoerceLuaToJava.java index 7354bddd..9755c853 100644 --- a/src/addon/java/lua/addon/luajava/CoerceLuaToJava.java +++ b/src/addon/java/lua/addon/luajava/CoerceLuaToJava.java @@ -65,7 +65,7 @@ public class CoerceLuaToJava { }; Coercion stringCoercion = new Coercion() { public Object coerce(LValue value) { - return value.luaAsString(); + return value.luaAsString().toJavaString(); } public int score(LValue value) { if ( value instanceof LUserData ) @@ -78,7 +78,7 @@ public class CoerceLuaToJava { if ( value instanceof LUserData ) return ((LUserData)value).m_instance; if ( value instanceof LString ) - return value.luaAsString(); + return value.luaAsString().toJavaString(); if ( value instanceof LInteger ) return Integer.valueOf(value.luaAsInt()); if ( value instanceof LDouble ) diff --git a/src/addon/java/lua/addon/luajava/LuaJava.java b/src/addon/java/lua/addon/luajava/LuaJava.java index fbf3f512..7e6db2a6 100644 --- a/src/addon/java/lua/addon/luajava/LuaJava.java +++ b/src/addon/java/lua/addon/luajava/LuaJava.java @@ -17,6 +17,7 @@ import java.util.Map; import lua.GlobalState; import lua.VM; import lua.value.LFunction; +import lua.value.LString; import lua.value.LTable; import lua.value.LUserData; import lua.value.LValue; @@ -121,7 +122,7 @@ public final class LuaJava extends LFunction { this.clazz = clazz; } public void luaGetTable(VM vm, LValue table, LValue key) { - final String s = key.luaAsString(); + final String s = key.luaAsString().toJavaString(); try { Field f = clazz.getField(s); Object o = f.get(m_instance); @@ -135,7 +136,7 @@ public final class LuaJava extends LFunction { } public void luaSetTable(VM vm, LValue table, LValue key, LValue val) { Class c = m_instance.getClass(); - String s = key.luaAsString(); + String s = key.luaAsString().toJavaString(); try { Field f = c.getField(s); Object v = CoerceLuaToJava.coerceArg(val, f.getType()); diff --git a/src/main/java/lua/VM.java b/src/main/java/lua/VM.java index ad7fdd16..e3c13ca0 100644 --- a/src/main/java/lua/VM.java +++ b/src/main/java/lua/VM.java @@ -1,6 +1,7 @@ package lua; import lua.io.Closure; +import lua.value.LString; import lua.value.LValue; public interface VM { @@ -114,6 +115,13 @@ public interface VM { */ public String getArgAsString( int index ); + /** + * Get the index-th argument as an LString value, or "" if fewer than index arguments were supplied. + * @param index + * @return + */ + public LString getArgAsLuaString( int index ); + /** Set top to base in preparation for pushing return values. * Can be used when returning no values. * diff --git a/src/main/java/lua/io/LoadState.java b/src/main/java/lua/io/LoadState.java index 6a4ee22d..df5ff83c 100644 --- a/src/main/java/lua/io/LoadState.java +++ b/src/main/java/lua/io/LoadState.java @@ -99,8 +99,7 @@ public class LoadState { return null; byte[] bytes = new byte[size]; is.readFully( bytes ); - String s = new String( bytes, 0, size-1 ); - return new LString( s ); + return new LString( bytes, 0, bytes.length - 1 ); } static LNumber longBitsToLuaNumber( long bits ) { diff --git a/src/main/java/lua/value/LBoolean.java b/src/main/java/lua/value/LBoolean.java index 6b57b027..bf4ef7b6 100644 --- a/src/main/java/lua/value/LBoolean.java +++ b/src/main/java/lua/value/LBoolean.java @@ -8,15 +8,15 @@ public final class LBoolean extends LValue { public static final LString TYPE_NAME = new LString("boolean"); - private final String m_name; + private final LString m_name; private final boolean m_value; private LBoolean( String name, boolean value ) { - this.m_name = name; + this.m_name = new LString( name ); this.m_value = value; } - public final String luaAsString() { + public final LString luaAsString() { return m_name; } diff --git a/src/main/java/lua/value/LDouble.java b/src/main/java/lua/value/LDouble.java index 4172b808..453d1272 100644 --- a/src/main/java/lua/value/LDouble.java +++ b/src/main/java/lua/value/LDouble.java @@ -14,8 +14,8 @@ public class LDouble extends LNumber { return (int) m_value; } - public String luaAsString() { - return String.valueOf(m_value); + public LString luaAsString() { + return LString.valueOf( m_value ); } public boolean isInteger() { diff --git a/src/main/java/lua/value/LFunction.java b/src/main/java/lua/value/LFunction.java index eed156bc..9aef0110 100644 --- a/src/main/java/lua/value/LFunction.java +++ b/src/main/java/lua/value/LFunction.java @@ -7,8 +7,8 @@ public class LFunction extends LValue { public static final LString TYPE_NAME = new LString("function"); - public String luaAsString() { - return "function: "+hashCode(); + public LString luaAsString() { + return new LString( "function: "+hashCode() ); } public void luaSetTable(VM vm, LValue table, LValue key, LValue val) { diff --git a/src/main/java/lua/value/LInteger.java b/src/main/java/lua/value/LInteger.java index b66f44c4..16c4a5ad 100644 --- a/src/main/java/lua/value/LInteger.java +++ b/src/main/java/lua/value/LInteger.java @@ -22,8 +22,8 @@ public class LInteger extends LNumber { return m_value; } - public String luaAsString() { - return String.valueOf(m_value); + public LString luaAsString() { + return LString.valueOf(m_value); } public boolean isInteger() { diff --git a/src/main/java/lua/value/LNil.java b/src/main/java/lua/value/LNil.java index b2239656..99fa3400 100644 --- a/src/main/java/lua/value/LNil.java +++ b/src/main/java/lua/value/LNil.java @@ -4,8 +4,8 @@ public final class LNil extends LValue { public static final LNil NIL = new LNil(); public static final LString TYPE_NAME = new LString("nil"); - public final String luaAsString() { - return "nil"; + public final LString luaAsString() { + return TYPE_NAME; } public boolean luaAsBoolean() { diff --git a/src/main/java/lua/value/LString.java b/src/main/java/lua/value/LString.java index 8e696f5c..d64f675f 100644 --- a/src/main/java/lua/value/LString.java +++ b/src/main/java/lua/value/LString.java @@ -1,42 +1,175 @@ package lua.value; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; + import lua.Lua; +/** + * A String implementation for Lua using bytes instead of chars. + * + * This should have the following advantages: + * + * (1) We can use strings as byte buffers, as Lua does, and therefore avoid + * questions about how to adapt Lua APIs that use strings with binary data. + * + * (2) Half the memory usage when strings are primarily ASCII + * + * + * TODO: Decide if/when to copy the bytes to a new array to ensure memory does + * not "leak" in the form of unused portions of byte arrays. Currently, for + * efficiency, new LStrings and substrings never create copies. + */ public class LString extends LValue { public static final LString TYPE_NAME = new LString("string"); - final String m_string; + final byte[] m_bytes; + final int m_offset; + final int m_length; final int m_hash; private static LTable s_stringMT; + /** + * Construct a Lua string from the given Java string. Characters are encoded + * using UTF-8. + */ public LString(String string) { - this.m_string = string; - this.m_hash = string.hashCode(); + byte[] bytes; + try { + bytes = string.getBytes( "UTF-8" ); + } catch ( UnsupportedEncodingException exn ) { + bytes = stringToUtf8Bytes( string ); + } + this.m_bytes = bytes; + this.m_offset = 0; + this.m_length = m_bytes.length; + this.m_hash = hashBytes( m_bytes, 0, m_length ); } - + + /** + * Construct a string from the given byte array. + * + * new LString(b) is identical to new LString(b, 0, b.length) + */ + public LString(byte[] bytes) { + this( bytes, 0, bytes.length ); + } + + /** + * Construct a string from the given byte array and range. For efficiency, + * the byte array is not copied. Lua strings are immutable so the bytes must + * not be modified after the string is constructed. + */ + public LString(byte[] bytes, int off, int len) { + if ( off < 0 || len < 0 || off+len > bytes.length ) + throw new IndexOutOfBoundsException(); + this.m_bytes = bytes; + this.m_offset = off; + this.m_length = len; + this.m_hash = hashBytes( bytes, off, len ); + } + public boolean equals(Object o) { if ( o != null && o instanceof LString ) { LString s = (LString) o; - return m_hash == s.m_hash && m_string.equals(s.m_string); + return m_hash == s.m_hash && + m_length == s.m_length && + ( ( m_bytes == s.m_bytes && m_offset == s.m_offset ) || + equals( m_bytes, m_offset, s.m_bytes, s.m_offset, m_length ) ); } return false; } - + + public int compareTo( LString o ) { + final byte[] a = this.m_bytes; + final byte[] b = o.m_bytes; + int i = this.m_offset; + int j = o.m_offset; + final int imax = i + m_length; + final int jmax = j + o.m_length; + + if ( a == b && i == j && imax == jmax ) + return 0; + + while ( i < imax && j < jmax ) { + if ( a[i] != b[i] ) { + return ( ( (int)a[i] ) & 0x0FF ) - ( ( (int)b[j] ) & 0x0FF ); + } + i++; + j++; + } + + return m_length - o.m_length; + } + public int hashCode() { return m_hash; } + public int length() { + return m_length; + } + + public LString substring( int beginIndex, int endIndex ) { + return new LString( m_bytes, m_offset + beginIndex, endIndex - beginIndex ); + } + + public static LString valueOf( double d ) { + return new LString( String.valueOf( d ) ); + } + + public static LString valueOf( int x ) { + return new LString( String.valueOf( x ) ); + } + + public static LString concat( final LString[] strings ) { + int length = 0; + for ( int i = 0; i < strings.length; ++i ) { + length += strings[i].length(); + } + byte[] bytes = new byte[length]; + + for ( int i = 0, offset = 0; i < strings.length; ++i ) { + LString s = strings[i]; + final int len = s.length(); + System.arraycopy( s.m_bytes, s.m_offset, bytes, offset, len ); + offset += len; + } + + return new LString( bytes ); + } + + /** + * Write the specified substring of this string to the given output stream. + */ + public void write( OutputStream os, int offset, int len ) throws IOException { + if ( offset < 0 || len < 0 ) + throw new IndexOutOfBoundsException(); + if ( offset + len > m_length ) + throw new IndexOutOfBoundsException(); + + os.write( m_bytes, m_offset+offset, len ); + } + + /** + * Copy the bytes of the string into the given byte array. + */ + public void copyInto( int strOffset, byte[] bytes, int arrayOffset, int len ) { + System.arraycopy( m_bytes, m_offset+strOffset, bytes, arrayOffset, len ); + } + public boolean luaBinCmpUnknown(int opcode, LValue lhs) { - return lhs.luaBinCmpString(opcode, m_string); + return lhs.luaBinCmpString(opcode, this); } - public boolean luaBinCmpString(int opcode, String rhs) { + public boolean luaBinCmpString(int opcode, LString rhs) { switch ( opcode ) { - case Lua.OP_EQ: return m_string.equals(rhs); - case Lua.OP_LT: return m_string.compareTo(rhs) < 0; - case Lua.OP_LE: return m_string.compareTo(rhs) <= 0; + case Lua.OP_EQ: return equals(rhs); + case Lua.OP_LT: return compareTo(rhs) < 0; + case Lua.OP_LE: return compareTo(rhs) <= 0; } luaUnsupportedOperation(); return false; @@ -64,7 +197,7 @@ public class LString extends LValue { public LValue luaToNumber( int base ) { if ( base >= 2 && base <= 36 ) { - String str = m_string.trim(); + String str = toJavaString().trim(); try { return new LInteger( Integer.parseInt( str, base ) ); } catch ( NumberFormatException nfe ) { @@ -80,13 +213,21 @@ public class LString extends LValue { return LNil.NIL; } - public String luaAsString() { - return m_string; + public LString luaAsString() { + return this; } - + + public String toJavaString() { + try { + return new String( m_bytes, m_offset, m_length, "UTF-8" ); + } catch ( UnsupportedEncodingException uee ) { + throw new RuntimeException("toJavaString: UTF-8 decoding not implemented"); + } + } + /** Built-in opcode LEN, for Strings and Tables */ public LValue luaLength() { - return new LInteger( m_string.length() ); + return new LInteger( length() ); } public LString luaGetType() { @@ -112,4 +253,72 @@ public class LString extends LValue { } return s_stringMT; } + + public static boolean equals( byte[] a, int i, byte[] b, int j, int n ) { + final int imax = i + n; + final int jmax = j + n; + while ( i < imax && j < jmax ) { + if ( a[i++] != b[j++] ) + return false; + } + return true; + } + + private static int hashBytes( byte[] bytes, int offset, int length ) { + // Compute the hash of the given bytes. + // This code comes right out of Lua 5.1.2 (translated from C to Java) + int h = length; /* seed */ + int step = (length>>5)+1; /* if string is too long, don't hash all its chars */ + for (int l1=length; l1>=step; l1-=step) /* compute hash */ + h = h ^ ((h<<5)+(h>>2)+(((int) bytes[offset+l1-1] ) & 0x0FF )); + return h; + } + + private static byte[] stringToUtf8Bytes( final String string ) { + final int strlen = string.length(); + byte[] bytes = new byte[ strlen ]; + byte b1 = 0, b2 = 0, b3 = 0; + + int j = 0; + for ( int i = 0; i < strlen; ++i ) { + int c = string.charAt( i ); + // TODO: combine 2-character combinations + int count; + if ( c > 0x07FF ) { + count = 3; + b3 = (byte)( 0xE0 | ( c >> 12 ) ); + b2 = (byte)( 0x80 | ( ( c >> 6 ) & 0x03F ) ); + b1 = (byte)( 0x80 | ( ( c ) & 0x03F ) ); + } else if ( c > 0x07F ) { + count = 2; + b2 = (byte)( 0xC0 | ( c >> 6 ) ); + b1 = (byte)( 0x80 | ( c & 0x03F ) ); + } else { + count = 1; + b1 = (byte) c; + } + if ( j + count > bytes.length ) { + bytes = realloc( bytes, ( j + count ) * 2 ); + } + switch ( count ) { + case 3: + bytes[j++] = b3; + case 2: + bytes[j++] = b2; + case 1: + bytes[j++] = b1; + } + } + + if ( j != bytes.length ) { + bytes = realloc( bytes, j ); + } + return bytes; + } + + private static byte[] realloc( byte[] a, int newSize ) { + final byte[] newbytes = new byte[ newSize ]; + System.arraycopy( a, 0, newbytes, 0, Math.min( newSize, a.length ) ); + return newbytes; + } } diff --git a/src/main/java/lua/value/LTable.java b/src/main/java/lua/value/LTable.java index 83b5e43f..7e8d3315 100644 --- a/src/main/java/lua/value/LTable.java +++ b/src/main/java/lua/value/LTable.java @@ -128,16 +128,7 @@ public class LTable extends LValue { * initializing a table. Bypasses the metatable, if any. */ public void put( String key, LValue value ) { - if (value == null || value == LNil.NIL) { - remove( key ); - return; - } - if (checkLoadFactor()) - rehash(); - int slot = findSlot( key ); - if (fillHashSlot( slot, value )) - return; - m_hashKeys[slot] = new LString( key ); + put( new LString( key ), value ); } /** @@ -267,8 +258,8 @@ public class LTable extends LValue { (LTable) metatable : null; } - public String luaAsString() { - return "table: "+id(); + public LString luaAsString() { + return new LString("table: "+id()); } public LString luaGetType() { @@ -334,13 +325,6 @@ public class LTable extends LValue { } } - private void remove( String key ) { - if ( m_hashKeys != null ) { - int slot = findSlot( key ); - clearSlot( slot ); - } - } - private void remove( LValue key ) { if ( m_hashKeys != null ) { int slot = findSlot( key ); @@ -385,20 +369,6 @@ public class LTable extends LValue { } return i; } - - private int findSlot( String key ) { - // NOTE: currently LString uses the String's hashCode. - int i = hashToIndex( key.hashCode() ); - - // This loop is guaranteed to terminate as long as we never allow the - // table to get 100% full. - LValue k; - while ( ( k = m_hashKeys[i] ) != null && - !k.luaBinCmpString( Lua.OP_EQ, key ) ) { - i = ( i + 1 ) % m_hashKeys.length; - } - return i; - } private int findSlot( int key ) { int i = hashToIndex( LInteger.hashCodeOf( key ) ); diff --git a/src/main/java/lua/value/LThread.java b/src/main/java/lua/value/LThread.java index 76f6f5e6..03faf58c 100644 --- a/src/main/java/lua/value/LThread.java +++ b/src/main/java/lua/value/LThread.java @@ -7,4 +7,7 @@ public class LThread extends LValue { return TYPE_NAME; } + public LString luaAsString() { + return new LString("thread: "+hashCode()); + } } diff --git a/src/main/java/lua/value/LUserData.java b/src/main/java/lua/value/LUserData.java index ab26121d..a89d347d 100644 --- a/src/main/java/lua/value/LUserData.java +++ b/src/main/java/lua/value/LUserData.java @@ -10,8 +10,8 @@ public class LUserData extends LValue { m_instance = obj; } - public String luaAsString() { - return m_instance.toString(); + public LString luaAsString() { + return new LString(m_instance.toString()); } public boolean equals(Object obj) { diff --git a/src/main/java/lua/value/LValue.java b/src/main/java/lua/value/LValue.java index c730b9d8..845f6f3d 100644 --- a/src/main/java/lua/value/LValue.java +++ b/src/main/java/lua/value/LValue.java @@ -61,7 +61,7 @@ public class LValue { } // unsupported except for strings - public boolean luaBinCmpString(int opcode, String rhs) { + public boolean luaBinCmpString(int opcode, LString rhs) { if ( opcode == Lua.OP_EQ ) return false; luaUnsupportedOperation(); @@ -122,13 +122,11 @@ public class LValue { /** Get the value as a String */ - public String luaAsString() { - return super.toString(); - } + public abstract LString luaAsString(); /** Override standard toString with lua String conversion by default */ public String toString() { - return luaAsString(); + return luaAsString().toJavaString(); } /** Return value as an integer */ diff --git a/src/test/java/lua/StandardTest.java b/src/test/java/lua/StandardTest.java index 1a069f10..73cf208e 100644 --- a/src/test/java/lua/StandardTest.java +++ b/src/test/java/lua/StandardTest.java @@ -93,7 +93,7 @@ public class StandardTest extends TestCase { CallInfo call = state.calls[i]; Proto p = call.closure.p; int line = p.lineinfo[call.pc]; - String func = call.closure.luaAsString(); + String func = call.closure.luaAsString().toJavaString(); stackTrace[state.cc - i] = new StackTraceElement(getName(), func, getName()+".lua", line ); } diff --git a/src/test/java/lua/value/LTableTest.java b/src/test/java/lua/value/LTableTest.java index 214daff1..ba05618a 100644 --- a/src/test/java/lua/value/LTableTest.java +++ b/src/test/java/lua/value/LTableTest.java @@ -72,7 +72,7 @@ public class LTableTest extends TestCase { assertTrue( ( intKeys & mask ) == 0 ); intKeys |= mask; } else if ( k instanceof LString ) { - final int ik = Integer.parseInt( k.luaAsString() ); + final int ik = Integer.parseInt( k.luaAsString().toJavaString() ); assertEquals( String.valueOf( ik ), k.luaAsString() ); assertTrue( ik >= 0 && ik < 10 ); final int mask = 1 << ik;