Change LString to use an array of bytes instead of wrapping Java's String.
This brings our implementation more in line with regular C Lua.
This commit is contained in:
@@ -1,42 +1,175 @@
|
||||
package lua.value;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import lua.Lua;
|
||||
|
||||
/**
|
||||
* A String implementation for Lua using bytes instead of chars.
|
||||
*
|
||||
* This should have the following advantages:
|
||||
*
|
||||
* (1) We can use strings as byte buffers, as Lua does, and therefore avoid
|
||||
* questions about how to adapt Lua APIs that use strings with binary data.
|
||||
*
|
||||
* (2) Half the memory usage when strings are primarily ASCII
|
||||
*
|
||||
*
|
||||
* TODO: Decide if/when to copy the bytes to a new array to ensure memory does
|
||||
* not "leak" in the form of unused portions of byte arrays. Currently, for
|
||||
* efficiency, new LStrings and substrings never create copies.
|
||||
*/
|
||||
public class LString extends LValue {
|
||||
|
||||
public static final LString TYPE_NAME = new LString("string");
|
||||
|
||||
final String m_string;
|
||||
final byte[] m_bytes;
|
||||
final int m_offset;
|
||||
final int m_length;
|
||||
final int m_hash;
|
||||
|
||||
private static LTable s_stringMT;
|
||||
|
||||
/**
|
||||
* Construct a Lua string from the given Java string. Characters are encoded
|
||||
* using UTF-8.
|
||||
*/
|
||||
public LString(String string) {
|
||||
this.m_string = string;
|
||||
this.m_hash = string.hashCode();
|
||||
byte[] bytes;
|
||||
try {
|
||||
bytes = string.getBytes( "UTF-8" );
|
||||
} catch ( UnsupportedEncodingException exn ) {
|
||||
bytes = stringToUtf8Bytes( string );
|
||||
}
|
||||
this.m_bytes = bytes;
|
||||
this.m_offset = 0;
|
||||
this.m_length = m_bytes.length;
|
||||
this.m_hash = hashBytes( m_bytes, 0, m_length );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Construct a string from the given byte array.
|
||||
*
|
||||
* new LString(b) is identical to new LString(b, 0, b.length)
|
||||
*/
|
||||
public LString(byte[] bytes) {
|
||||
this( bytes, 0, bytes.length );
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a string from the given byte array and range. For efficiency,
|
||||
* the byte array is not copied. Lua strings are immutable so the bytes must
|
||||
* not be modified after the string is constructed.
|
||||
*/
|
||||
public LString(byte[] bytes, int off, int len) {
|
||||
if ( off < 0 || len < 0 || off+len > bytes.length )
|
||||
throw new IndexOutOfBoundsException();
|
||||
this.m_bytes = bytes;
|
||||
this.m_offset = off;
|
||||
this.m_length = len;
|
||||
this.m_hash = hashBytes( bytes, off, len );
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( o != null && o instanceof LString ) {
|
||||
LString s = (LString) o;
|
||||
return m_hash == s.m_hash && m_string.equals(s.m_string);
|
||||
return m_hash == s.m_hash &&
|
||||
m_length == s.m_length &&
|
||||
( ( m_bytes == s.m_bytes && m_offset == s.m_offset ) ||
|
||||
equals( m_bytes, m_offset, s.m_bytes, s.m_offset, m_length ) );
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public int compareTo( LString o ) {
|
||||
final byte[] a = this.m_bytes;
|
||||
final byte[] b = o.m_bytes;
|
||||
int i = this.m_offset;
|
||||
int j = o.m_offset;
|
||||
final int imax = i + m_length;
|
||||
final int jmax = j + o.m_length;
|
||||
|
||||
if ( a == b && i == j && imax == jmax )
|
||||
return 0;
|
||||
|
||||
while ( i < imax && j < jmax ) {
|
||||
if ( a[i] != b[i] ) {
|
||||
return ( ( (int)a[i] ) & 0x0FF ) - ( ( (int)b[j] ) & 0x0FF );
|
||||
}
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
|
||||
return m_length - o.m_length;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return m_hash;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return m_length;
|
||||
}
|
||||
|
||||
public LString substring( int beginIndex, int endIndex ) {
|
||||
return new LString( m_bytes, m_offset + beginIndex, endIndex - beginIndex );
|
||||
}
|
||||
|
||||
public static LString valueOf( double d ) {
|
||||
return new LString( String.valueOf( d ) );
|
||||
}
|
||||
|
||||
public static LString valueOf( int x ) {
|
||||
return new LString( String.valueOf( x ) );
|
||||
}
|
||||
|
||||
public static LString concat( final LString[] strings ) {
|
||||
int length = 0;
|
||||
for ( int i = 0; i < strings.length; ++i ) {
|
||||
length += strings[i].length();
|
||||
}
|
||||
byte[] bytes = new byte[length];
|
||||
|
||||
for ( int i = 0, offset = 0; i < strings.length; ++i ) {
|
||||
LString s = strings[i];
|
||||
final int len = s.length();
|
||||
System.arraycopy( s.m_bytes, s.m_offset, bytes, offset, len );
|
||||
offset += len;
|
||||
}
|
||||
|
||||
return new LString( bytes );
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the specified substring of this string to the given output stream.
|
||||
*/
|
||||
public void write( OutputStream os, int offset, int len ) throws IOException {
|
||||
if ( offset < 0 || len < 0 )
|
||||
throw new IndexOutOfBoundsException();
|
||||
if ( offset + len > m_length )
|
||||
throw new IndexOutOfBoundsException();
|
||||
|
||||
os.write( m_bytes, m_offset+offset, len );
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the bytes of the string into the given byte array.
|
||||
*/
|
||||
public void copyInto( int strOffset, byte[] bytes, int arrayOffset, int len ) {
|
||||
System.arraycopy( m_bytes, m_offset+strOffset, bytes, arrayOffset, len );
|
||||
}
|
||||
|
||||
public boolean luaBinCmpUnknown(int opcode, LValue lhs) {
|
||||
return lhs.luaBinCmpString(opcode, m_string);
|
||||
return lhs.luaBinCmpString(opcode, this);
|
||||
}
|
||||
|
||||
public boolean luaBinCmpString(int opcode, String rhs) {
|
||||
public boolean luaBinCmpString(int opcode, LString rhs) {
|
||||
switch ( opcode ) {
|
||||
case Lua.OP_EQ: return m_string.equals(rhs);
|
||||
case Lua.OP_LT: return m_string.compareTo(rhs) < 0;
|
||||
case Lua.OP_LE: return m_string.compareTo(rhs) <= 0;
|
||||
case Lua.OP_EQ: return equals(rhs);
|
||||
case Lua.OP_LT: return compareTo(rhs) < 0;
|
||||
case Lua.OP_LE: return compareTo(rhs) <= 0;
|
||||
}
|
||||
luaUnsupportedOperation();
|
||||
return false;
|
||||
@@ -64,7 +197,7 @@ public class LString extends LValue {
|
||||
|
||||
public LValue luaToNumber( int base ) {
|
||||
if ( base >= 2 && base <= 36 ) {
|
||||
String str = m_string.trim();
|
||||
String str = toJavaString().trim();
|
||||
try {
|
||||
return new LInteger( Integer.parseInt( str, base ) );
|
||||
} catch ( NumberFormatException nfe ) {
|
||||
@@ -80,13 +213,21 @@ public class LString extends LValue {
|
||||
return LNil.NIL;
|
||||
}
|
||||
|
||||
public String luaAsString() {
|
||||
return m_string;
|
||||
public LString luaAsString() {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public String toJavaString() {
|
||||
try {
|
||||
return new String( m_bytes, m_offset, m_length, "UTF-8" );
|
||||
} catch ( UnsupportedEncodingException uee ) {
|
||||
throw new RuntimeException("toJavaString: UTF-8 decoding not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
/** Built-in opcode LEN, for Strings and Tables */
|
||||
public LValue luaLength() {
|
||||
return new LInteger( m_string.length() );
|
||||
return new LInteger( length() );
|
||||
}
|
||||
|
||||
public LString luaGetType() {
|
||||
@@ -112,4 +253,72 @@ public class LString extends LValue {
|
||||
}
|
||||
return s_stringMT;
|
||||
}
|
||||
|
||||
public static boolean equals( byte[] a, int i, byte[] b, int j, int n ) {
|
||||
final int imax = i + n;
|
||||
final int jmax = j + n;
|
||||
while ( i < imax && j < jmax ) {
|
||||
if ( a[i++] != b[j++] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static int hashBytes( byte[] bytes, int offset, int length ) {
|
||||
// Compute the hash of the given bytes.
|
||||
// This code comes right out of Lua 5.1.2 (translated from C to Java)
|
||||
int h = length; /* seed */
|
||||
int step = (length>>5)+1; /* if string is too long, don't hash all its chars */
|
||||
for (int l1=length; l1>=step; l1-=step) /* compute hash */
|
||||
h = h ^ ((h<<5)+(h>>2)+(((int) bytes[offset+l1-1] ) & 0x0FF ));
|
||||
return h;
|
||||
}
|
||||
|
||||
private static byte[] stringToUtf8Bytes( final String string ) {
|
||||
final int strlen = string.length();
|
||||
byte[] bytes = new byte[ strlen ];
|
||||
byte b1 = 0, b2 = 0, b3 = 0;
|
||||
|
||||
int j = 0;
|
||||
for ( int i = 0; i < strlen; ++i ) {
|
||||
int c = string.charAt( i );
|
||||
// TODO: combine 2-character combinations
|
||||
int count;
|
||||
if ( c > 0x07FF ) {
|
||||
count = 3;
|
||||
b3 = (byte)( 0xE0 | ( c >> 12 ) );
|
||||
b2 = (byte)( 0x80 | ( ( c >> 6 ) & 0x03F ) );
|
||||
b1 = (byte)( 0x80 | ( ( c ) & 0x03F ) );
|
||||
} else if ( c > 0x07F ) {
|
||||
count = 2;
|
||||
b2 = (byte)( 0xC0 | ( c >> 6 ) );
|
||||
b1 = (byte)( 0x80 | ( c & 0x03F ) );
|
||||
} else {
|
||||
count = 1;
|
||||
b1 = (byte) c;
|
||||
}
|
||||
if ( j + count > bytes.length ) {
|
||||
bytes = realloc( bytes, ( j + count ) * 2 );
|
||||
}
|
||||
switch ( count ) {
|
||||
case 3:
|
||||
bytes[j++] = b3;
|
||||
case 2:
|
||||
bytes[j++] = b2;
|
||||
case 1:
|
||||
bytes[j++] = b1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( j != bytes.length ) {
|
||||
bytes = realloc( bytes, j );
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private static byte[] realloc( byte[] a, int newSize ) {
|
||||
final byte[] newbytes = new byte[ newSize ];
|
||||
System.arraycopy( a, 0, newbytes, 0, Math.min( newSize, a.length ) );
|
||||
return newbytes;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user