Change LuaC to read bytes instead of chars.

This fixes some of the test cases by avoiding a lossy round-trip conversion
from bytes presumed to be, but might not be, UTF-8 encoded characters, and
then back to bytes. All of the compiler test cases now pass.
This commit is contained in:
Ian Farmer
2008-02-13 08:02:17 +00:00
parent 9c4bbf670f
commit 1672e73c40
6 changed files with 57 additions and 40 deletions

View File

@@ -22,7 +22,7 @@
package org.luaj.compiler; package org.luaj.compiler;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.InputStream;
import java.util.Hashtable; import java.util.Hashtable;
import org.luaj.compiler.FuncState.BlockCnt; import org.luaj.compiler.FuncState.BlockCnt;
@@ -138,11 +138,11 @@ public class LexState {
final Token lookahead = new Token(); /* look ahead token */ final Token lookahead = new Token(); /* look ahead token */
FuncState fs; /* `FuncState' is private to the parser */ FuncState fs; /* `FuncState' is private to the parser */
LuaC L; LuaC L;
Reader z; /* input stream */ InputStream z; /* input stream */
char[] buff; /* buffer for tokens */ byte[] buff; /* buffer for tokens */
int nbuff; /* length of buffer */ int nbuff; /* length of buffer */
LString source; /* current source name */ LString source; /* current source name */
char decpoint; /* locale decimal point */ byte decpoint; /* locale decimal point */
/* ORDER RESERVED */ /* ORDER RESERVED */
final static String luaX_tokens [] = { final static String luaX_tokens [] = {
@@ -197,9 +197,9 @@ public class LexState {
} }
public LexState(LuaC state, Reader reader) { public LexState(LuaC state, InputStream stream) {
this.z = reader; this.z = stream;
this.buff = new char[32]; this.buff = new byte[32];
this.L = state; this.L = state;
} }
@@ -224,7 +224,7 @@ public class LexState {
void save(int c) { void save(int c) {
if ( buff == null || nbuff + 1 > buff.length ) if ( buff == null || nbuff + 1 > buff.length )
buff = LuaC.realloc( buff, nbuff*2+1 ); buff = LuaC.realloc( buff, nbuff*2+1 );
buff[nbuff++] = (char) c; buff[nbuff++] = (byte) c;
} }
@@ -281,12 +281,12 @@ public class LexState {
lexerror( msg, t.token ); lexerror( msg, t.token );
} }
LString newstring( char[] chars, int offset, int len) { LString newstring( String s ) {
return newstring( new String(chars, offset, len) ); return L.newTString( LString.valueOf(s) );
} }
LString newstring( String s ) { LString newstring( byte[] chars, int offset, int len ) {
return L.newTString( s ); return L.newTString( LString.newStringNoCopy(chars, offset, len) );
} }
void inclinenumber() { void inclinenumber() {
@@ -299,7 +299,7 @@ public class LexState {
syntaxerror("chunk has too many lines"); syntaxerror("chunk has too many lines");
} }
void setinput( LuaC L, int firstByte, Reader z, LString source ) { void setinput( LuaC L, int firstByte, InputStream z, LString source ) {
this.decpoint = '.'; this.decpoint = '.';
this.L = L; this.L = L;
this.lookahead.token = TK_EOS; /* no look-ahead token */ this.lookahead.token = TK_EOS; /* no look-ahead token */
@@ -335,9 +335,9 @@ public class LexState {
return true; return true;
} }
void buffreplace(char from, char to) { void buffreplace(byte from, byte to) {
int n = nbuff; int n = nbuff;
char[] p = buff; byte[] p = buff;
while ((--n) >= 0) while ((--n) >= 0)
if (p[n] == from) if (p[n] == from)
p[n] = to; p[n] = to;
@@ -394,7 +394,7 @@ public class LexState {
while (isalnum(current) || current == '_') while (isalnum(current) || current == '_')
save_and_next(); save_and_next();
save('\0'); save('\0');
buffreplace('.', decpoint); /* follow locale for decimal point */ buffreplace((byte)'.', decpoint); /* follow locale for decimal point */
String str = new String(buff, 0, nbuff); String str = new String(buff, 0, nbuff);
// if (!str2d(str, seminfo)) /* format error? */ // if (!str2d(str, seminfo)) /* format error? */
// trydecpoint(str, seminfo); /* try to update decimal point separator */ // trydecpoint(str, seminfo); /* try to update decimal point separator */

View File

@@ -23,7 +23,6 @@ package org.luaj.compiler;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Reader;
import java.util.Hashtable; import java.util.Hashtable;
import org.luaj.vm.LPrototype; import org.luaj.vm.LPrototype;
@@ -33,7 +32,6 @@ import org.luaj.vm.LoadState;
import org.luaj.vm.LocVars; import org.luaj.vm.LocVars;
import org.luaj.vm.Lua; import org.luaj.vm.Lua;
import org.luaj.vm.LuaErrorException; import org.luaj.vm.LuaErrorException;
import org.luaj.vm.Platform;
import org.luaj.vm.LoadState.LuaCompiler; import org.luaj.vm.LoadState.LuaCompiler;
@@ -154,8 +152,8 @@ public class LuaC extends Lua implements LuaCompiler {
return a; return a;
} }
static char[] realloc(char[] v, int n) { static byte[] realloc(byte[] v, int n) {
char[] a = new char[n]; byte[] a = new byte[n];
if ( v != null ) if ( v != null )
System.arraycopy(v, 0, a, 0, Math.min(v.length,n)); System.arraycopy(v, 0, a, 0, Math.min(v.length,n));
return a; return a;
@@ -186,13 +184,12 @@ public class LuaC extends Lua implements LuaCompiler {
* @throws LuaErrorException if there is a syntax error. * @throws LuaErrorException if there is a syntax error.
*/ */
public LPrototype compile(int firstByte, InputStream stream, String name) throws IOException { public LPrototype compile(int firstByte, InputStream stream, String name) throws IOException {
Reader r = Platform.getInstance().createReader( stream );
LuaC compiler = new LuaC(); LuaC compiler = new LuaC();
return compiler.luaY_parser(firstByte, r, name); return compiler.luaY_parser(firstByte, stream, name);
} }
/** Parse the input */ /** Parse the input */
private LPrototype luaY_parser(int firstByte, Reader z, String name) { private LPrototype luaY_parser(int firstByte, InputStream z, String name) {
LexState lexstate = new LexState(this, z); LexState lexstate = new LexState(this, z);
FuncState funcstate = new FuncState(); FuncState funcstate = new FuncState();
// lexstate.buff = buff; // lexstate.buff = buff;
@@ -213,13 +210,15 @@ public class LuaC extends Lua implements LuaCompiler {
} }
public LString newlstr(char[] chars, int offset, int len) { public LString newlstr(char[] chars, int offset, int len) {
return newTString( new String(chars,offset,len) ); return newTString( LString.valueOf( new String(chars,offset,len) ) );
} }
public LString newTString(String s) { public LString newTString(LString s) {
LString t = (LString) strings.get(s); LString t = (LString) strings.get(s);
if ( t == null ) if ( t == null ) {
strings.put( s, t = new LString(s) ); t = LString.newStringCopy(s);
strings.put( t, t );
}
return t; return t;
} }

View File

@@ -149,6 +149,20 @@ public class LString extends LValue {
this.m_hash = hashBytes( bytes, off, len ); this.m_hash = hashBytes( bytes, off, len );
} }
public static LString newStringCopy(LString src) {
return newStringCopy( src.m_bytes, src.m_offset, src.m_length );
}
public static LString newStringCopy(byte[] buf, int off, int len) {
byte[] b = new byte[len];
System.arraycopy( buf, off, b, 0, len );
return new LString( b, 0, len );
}
public static LString newStringNoCopy(byte[] buf, int off, int len) {
return new LString( buf, off, len );
}
public boolean equals(Object o) { public boolean equals(Object o) {
if ( o != null && o instanceof LString ) { if ( o != null && o instanceof LString ) {
LString s = (LString) o; LString s = (LString) o;

View File

@@ -65,13 +65,15 @@ public class Print extends Lua {
}; };
static void printString(String s) { static void printString(final LString s) {
char[] chars = s.toCharArray(); final byte[] bytes = s.m_bytes;
final int off = s.m_offset;
ps.print('"'); ps.print('"');
for (int i = 0, n = chars.length; i < n; i++) { for (int i = 0, n = s.m_length; i < n; i++) {
char c = chars[i]; int c = bytes[i+off] & 0x0FF;
if ( c >= ' ' && c <= '~' && c != '\"' && c != '\\' ) if ( c >= ' ' && c <= '~' && c != '\"' && c != '\\' )
ps.print(c); ps.print((char) c);
else { else {
switch (c) { switch (c) {
case '"': case '"':
@@ -99,9 +101,8 @@ public class Print extends Lua {
ps.print("\\v"); ps.print("\\v");
break; break;
default: default:
ps.print("\\u"); ps.print('\\');
ps.print(Integer.toHexString(0x10000 + (int) c) ps.print(Integer.toString(1000 + c).substring(1));
.substring(1));
break; break;
} }
} }
@@ -111,7 +112,7 @@ public class Print extends Lua {
static void printValue( LValue v ) { static void printValue( LValue v ) {
if ( v instanceof LString ) if ( v instanceof LString )
printString(v.toString()); printString( v.luaAsString() );
else if ( v instanceof LInteger ) { else if ( v instanceof LInteger ) {
ps.print( v.toJavaInt() ); ps.print( v.toJavaInt() );
} else if ( v instanceof LDouble ) { } else if ( v instanceof LDouble ) {
@@ -180,7 +181,7 @@ public class Print extends Lua {
} }
switch (o) { switch (o) {
case OP_LOADK: case OP_LOADK:
printString(" ; "); ps.print(" ; ");
printConstant(f, bx); printConstant(f, bx);
break; break;
case OP_GETUPVAL: case OP_GETUPVAL:

View File

@@ -179,6 +179,7 @@ public class LuaJTest extends TestCase {
// load the file // load the file
LPrototype p = loadScriptResource( state, testName ); LPrototype p = loadScriptResource( state, testName );
p.source = LString.valueOf("stdin");
// Replace System.out with a ByteArrayOutputStream // Replace System.out with a ByteArrayOutputStream
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
@@ -208,7 +209,9 @@ public class LuaJTest extends TestCase {
} }
try { try {
return LoadState.undump(state, script, name); // Use "stdin" instead of resource name so that output matches
// standard Lua.
return LoadState.undump(state, script, "stdin");
} finally { } finally {
script.close(); script.close();
} }

View File

@@ -1 +1 @@
version: 0.20 version: 0.21