From 346e0ef465afa66c830a78bce7166766074281c3 Mon Sep 17 00:00:00 2001 From: James Roseborough Date: Fri, 23 Jul 2010 04:54:14 +0000 Subject: [PATCH] Handle source files as byte streams for lua2java conversions. --- src/jse/lua2java.java | 10 ++- src/jse/org/luaj/vm2/ast/Str.java | 33 ++++----- .../org/luaj/vm2/lua2java/JavaCodeGen.java | 72 ++++++++++--------- 3 files changed, 62 insertions(+), 53 deletions(-) diff --git a/src/jse/lua2java.java b/src/jse/lua2java.java index 226b5325..f460412f 100644 --- a/src/jse/lua2java.java +++ b/src/jse/lua2java.java @@ -47,6 +47,7 @@ public class lua2java { " -s src source directory\n" + " -d dir destination directory\n" + " -p pkg package prefix to apply to all classes\n" + + " -e enc override default character encoding\n" + " -r recursively compile all\n" + " -v verbose\n"; @@ -58,6 +59,7 @@ public class lua2java { private String srcdir = null; private String destdir = null; private String pkgprefix = null; + private String encoding = "ISO8859-1"; private boolean recurse = false; private boolean verbose = false; private List files = new ArrayList(); @@ -93,6 +95,11 @@ public class lua2java { usageExit(); pkgprefix = args[i]; break; + case 'e': + if ( ++i >= args.length ) + usageExit(); + encoding = args[i]; + break; case 'r': recurse = true; break; @@ -112,6 +119,7 @@ public class lua2java { System.out.println("srcdir: "+srcdir); System.out.println("destdir: "+destdir); System.out.println("files: "+files); + System.out.println("encoding: "+encoding); System.out.println("recurse: "+recurse); } @@ -186,7 +194,7 @@ public class lua2java { FileInputStream in = new FileInputStream(inf.infile); FileOutputStream out = new FileOutputStream(inf.outfile); PrintWriter pw = new PrintWriter(out); - LuaParser parser = new LuaParser(in); + LuaParser parser = new LuaParser(in,encoding); Chunk chunk = parser.Chunk(); new JavaCodeGen(chunk,pw,inf.javapackage,inf.javaclassname); pw.close(); diff --git a/src/jse/org/luaj/vm2/ast/Str.java b/src/jse/org/luaj/vm2/ast/Str.java index f0e5fad1..f58cef56 100644 --- a/src/jse/org/luaj/vm2/ast/Str.java +++ b/src/jse/org/luaj/vm2/ast/Str.java @@ -22,35 +22,41 @@ package org.luaj.vm2.ast; import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; import org.luaj.vm2.LuaString; public class Str { - public final byte[] bytes; - public final boolean isutf8; + private Str() {} - public Str(byte[] bytes) { - this.bytes = bytes; - this.isutf8 = true; // TODO: scan to see - } public static LuaString quoteString(String image) { String s = image.substring(1, image.length()-1); byte[] bytes = unquote(s); - // TODO: check for non-utf8 return LuaString.valueOf(bytes); } + public static LuaString charString(String image) { String s = image.substring(1, image.length()-1); byte[] bytes = unquote(s); - // TODO: check for non-utf8 return LuaString.valueOf(bytes); } + public static LuaString longString(String image) { int i = image.indexOf('[', image.indexOf('[')+1); String s = image.substring(i,image.length()-i); - return LuaString.valueOf(s); + byte[] b = iso88591bytes(s); + return LuaString.valueOf(b); } + + public static byte[] iso88591bytes( String s ) { + try { + return s.getBytes("ISO8859-1"); + } catch (UnsupportedEncodingException e) { + throw new IllegalStateException("ISO8859-1 not supported"); + } + } + public static byte[] unquote(String s) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); char[] c = s.toCharArray(); @@ -84,13 +90,4 @@ public class Str { } return baos.toByteArray(); } - /* - private static byte[] utf8decode(String s) { - try { - return s.getBytes("UTF8"); - } catch ( Exception e ) { - throw new RuntimeException("utf8 not found: "+e); - } - } - */ } diff --git a/src/jse/org/luaj/vm2/lua2java/JavaCodeGen.java b/src/jse/org/luaj/vm2/lua2java/JavaCodeGen.java index 90bdf7fb..ff35b191 100644 --- a/src/jse/org/luaj/vm2/lua2java/JavaCodeGen.java +++ b/src/jse/org/luaj/vm2/lua2java/JavaCodeGen.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Map; import org.luaj.vm2.Lua; +import org.luaj.vm2.LuaString; import org.luaj.vm2.LuaValue; import org.luaj.vm2.ast.Block; import org.luaj.vm2.ast.Chunk; @@ -88,7 +89,7 @@ public class JavaCodeGen { JavaScope javascope = null; List constantDeclarations = new ArrayList(); - Map stringConstants = new HashMap(); + Map stringConstants = new HashMap(); Map numberConstants = new HashMap(); @@ -359,8 +360,7 @@ public class JavaCodeGen { public void visit(Constant exp) { switch ( exp.value.type() ) { case LuaValue.TSTRING: { - // TODO: non-UTF8 data - out( evalStringConstant(exp.value.tojstring()) ); + out( evalLuaStringConstant(exp.value.checkstring()) ); break; } case LuaValue.TNIL: @@ -379,11 +379,14 @@ public class JavaCodeGen { } private String evalStringConstant(String str) { - // TODO: quoting, data pooling + return evalLuaStringConstant( LuaValue.valueOf(str) ); + } + + private String evalLuaStringConstant(LuaString str) { if ( stringConstants.containsKey(str) ) return stringConstants.get(str); - String declvalue = quotedStringInitializer(str.getBytes()); - String javaname = javascope.createConstantName(str); + String declvalue = quotedStringInitializer(str); + String javaname = javascope.createConstantName(str.tojstring()); constantDeclarations.add( "static final LuaValue "+javaname+" = valueOf("+declvalue+");" ); stringConstants.put(str,javaname); return javaname; @@ -726,40 +729,41 @@ public class JavaCodeGen { } } - private static String quotedStringInitializer(byte[] bytes) { - int n = bytes.length; + private static String quotedStringInitializer(LuaString s) { + byte[] bytes = s.m_bytes; + int o = s.m_offset; + int n = s.m_length; StringBuffer sb = new StringBuffer(n+2); - // check for characters beyond ascii 128 - for ( int i=0; i0 ) sb.append(","); - byte b = bytes[j]; - switch ( b ) { - case '\n': sb.append( "'\\n'" ); break; - case '\r': sb.append( "'\\r'" ); break; - case '\t': sb.append( "'\\t'" ); break; - case '\\': sb.append( "'\\\\'" ); break; - default: - if ( b >= ' ' ) { - sb.append( '\''); - sb.append( (char) b ); - sb.append( '\''); - } else { - sb.append( String.valueOf((int)b) ); - } - break; - } - } - sb.append( "}" ); - return sb.toString(); + // check for bytes not encodable as utf8 + if ( ! s.isValidUtf8() ) { + sb.append( "new byte[]{" ); + for ( int j=0; j0 ) sb.append(","); + byte b = bytes[o+j]; + switch ( b ) { + case '\n': sb.append( "'\\n'" ); break; + case '\r': sb.append( "'\\r'" ); break; + case '\t': sb.append( "'\\t'" ); break; + case '\\': sb.append( "'\\\\'" ); break; + default: + if ( b >= ' ' ) { + sb.append( '\''); + sb.append( (char) b ); + sb.append( '\''); + } else { + sb.append( String.valueOf((int)b) ); + } + break; + } } + sb.append( "}" ); + return sb.toString(); + } sb.append('"'); for ( int i=0; i