From 28f349a6fad31d0664ae1f975ba68d97fe4c0399 Mon Sep 17 00:00:00 2001 From: Ian Farmer Date: Thu, 24 Jul 2008 06:38:35 +0000 Subject: [PATCH] Add partial implementation of string.format. Not yet implemented: (1) '#' flag (use alternate format) (2) Floating point formats 2 digit widths and precisions are not yet tested. --- src/core/org/luaj/lib/LBuffer.java | 7 + src/core/org/luaj/lib/StringLib.java | 260 ++++++++++++++++++++++++++- src/core/org/luaj/vm/LString.java | 77 +++++--- src/test/errors/stringlibargs.lua | 2 + src/test/res/strlib.lua | 26 +++ 5 files changed, 338 insertions(+), 34 deletions(-) diff --git a/src/core/org/luaj/lib/LBuffer.java b/src/core/org/luaj/lib/LBuffer.java index 05195bbb..bcbec925 100644 --- a/src/core/org/luaj/lib/LBuffer.java +++ b/src/core/org/luaj/lib/LBuffer.java @@ -45,6 +45,13 @@ public class LBuffer { length += alen; } + public void append( String str ) { + final int alen = LString.lengthAsUtf8( str ); + ensureCapacity( length + alen ); + LString.encodeToUtf8( str, bytes, length ); + length += alen; + } + public void setLength( int length ) { ensureCapacity( length ); this.length = length; diff --git a/src/core/org/luaj/lib/StringLib.java b/src/core/org/luaj/lib/StringLib.java index 66975fab..7af90afb 100644 --- a/src/core/org/luaj/lib/StringLib.java +++ b/src/core/org/luaj/lib/StringLib.java @@ -27,10 +27,10 @@ import java.io.IOException; import org.luaj.compiler.DumpState; import org.luaj.vm.LClosure; import org.luaj.vm.LFunction; -import org.luaj.vm.LNumber; import org.luaj.vm.LString; import org.luaj.vm.LTable; import org.luaj.vm.LValue; +import org.luaj.vm.Lua; import org.luaj.vm.LuaState; @@ -263,10 +263,247 @@ public class StringLib extends LFunction { * except as arguments to the q option. */ static void format( LuaState vm ) { + LString fmt = vm.checklstring( 2 ); + final int n = fmt.length(); + LBuffer result = new LBuffer(n); + int arg = 2; + + for ( int i = 0; i < n; ) { + int c = fmt.luaByte( i++ ); + if ( c != L_ESC ) { + result.append( (byte) c ); + } else if ( i < n ) { + if ( ( c = fmt.luaByte( i ) ) == L_ESC ) { + ++i; + result.append( (byte)L_ESC ); + } else { + arg++; + FormatDesc fdsc = new FormatDesc(vm, fmt, i ); + i += fdsc.length; + switch ( fdsc.conversion ) { + case 'c': + fdsc.format( result, (byte)vm.checkint( arg ) ); + break; + case 'i': + case 'd': + fdsc.format( result, vm.checkint( arg ) ); + break; + case 'o': + case 'u': + case 'x': + case 'X': + fdsc.format( result, vm.checklong( arg ) ); + break; + case 'e': + case 'E': + case 'f': + case 'g': + case 'G': + fdsc.format( result, vm.checkdouble( arg ) ); + break; + case 'q': + addquoted( result, vm.checklstring( arg ) ); + break; + case 's': { + LString s = vm.checklstring( arg ); + if ( fdsc.precision == -1 && s.length() >= 100 ) { + result.append( s ); + } else { + fdsc.format( result, s ); + } + } break; + default: + vm.error("invalid option '%"+(char)fdsc.conversion+"' to 'format'"); + break; + } + } + } + } + vm.resettop(); - vm.pushstring( "" ); + vm.pushlstring( result.toLuaString() ); } - + + private static void addquoted(LBuffer buf, LString s) { + int c; + buf.append( (byte) '"' ); + for ( int i = 0, n = s.length(); i < n; i++ ) { + switch ( c = s.luaByte( i ) ) { + case '"': case '\\': case '\n': + buf.append( (byte)'\\' ); + buf.append( (byte)c ); + break; + case '\r': + buf.append( "\\r" ); + break; + case '\0': + buf.append( "\\000" ); + break; + default: + buf.append( (byte) c ); + break; + } + } + buf.append( (byte) '"' ); + } + + private static final String FLAGS = "-+ #0"; + + private static class FormatDesc { + + private boolean leftAdjust; + private boolean zeroPad; + private boolean explicitPlus; + private boolean space; + private boolean alternateForm; + private static final int MAX_FLAGS = 5; + + private int width; + private int precision; + + public final int conversion; + public final int length; + + public FormatDesc(LuaState vm, LString strfrmt, final int start) { + int p = start, n = strfrmt.length(); + int c = 0; + + boolean moreFlags = true; + while ( moreFlags ) { + switch ( c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ) ) { + case '-': leftAdjust = true; break; + case '+': explicitPlus = true; break; + case ' ': space = true; break; + case '#': alternateForm = true; break; + case '0': zeroPad = true; break; + default: moreFlags = false; break; + } + } + if ( p - start > MAX_FLAGS ) + vm.error("invalid format (repeated flags)"); + + width = -1; + if ( Character.isDigit( (char)c ) ) { + width = c - '0'; + c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); + if ( Character.isDigit( (char) c ) ) { + width = width * 10 + (c - '0'); + c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); + } + } + + precision = -1; + if ( c == '.' ) { + c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); + if ( Character.isDigit( (char) c ) ) { + precision = c - '0'; + c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); + if ( Character.isDigit( (char) c ) ) { + precision = precision * 10 + (c - '0'); + c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); + } + } + } + + if ( Character.isDigit( (char) c ) ) + vm.error("invalid format (width or precision too long)"); + + zeroPad &= !leftAdjust; // '-' overrides '0' + conversion = c; + length = p - start; + } + + public void format(LBuffer buf, byte c) { + // TODO: not clear that any of width, precision, or flags apply here. + buf.append(c); + } + + public void format(LBuffer buf, long number) { + String digits; + + if ( number == 0 && precision == 0 ) { + digits = ""; + } else { + int radix; + switch ( conversion ) { + case 'x': + case 'X': + radix = 16; + break; + case 'o': + radix = 8; + break; + default: + radix = 10; + break; + } + digits = Long.toString( number, radix ); + if ( conversion == 'X' ) + digits = digits.toUpperCase(); + } + + int minwidth = digits.length(); + int ndigits = minwidth; + int nzeros; + + if ( number < 0 ) { + ndigits--; + } else if ( explicitPlus || space ) { + minwidth++; + } + + if ( precision > ndigits ) + nzeros = precision - ndigits; + else if ( precision == -1 && zeroPad && width > minwidth ) + nzeros = width - minwidth; + else + nzeros = 0; + + minwidth += nzeros; + int nspaces = width > minwidth ? width - minwidth : 0; + + if ( !leftAdjust ) + pad( buf, ' ', nspaces ); + + if ( number < 0 ) { + if ( nzeros > 0 ) { + buf.append( (byte)'-' ); + digits = digits.substring( 1 ); + } + } else if ( explicitPlus ) { + buf.append( (byte)'+' ); + } else if ( space ) { + buf.append( (byte)' ' ); + } + + if ( nzeros > 0 ) + pad( buf, '0', nzeros ); + + buf.append( digits ); + + if ( leftAdjust ) + pad( buf, ' ', nspaces ); + } + + public void format(LBuffer buf, double x) { + // TODO + buf.append( String.valueOf( x ) ); + } + + public void format(LBuffer buf, LString s) { + int nullindex = s.indexOf( (byte)'\0', 0 ); + if ( nullindex != -1 ) + s = s.substring( 0, nullindex ); + buf.append(s); + } + + public static final void pad(LBuffer buf, char c, int n) { + byte b = (byte)c; + while ( n-- > 0 ) + buf.append(b); + } + } + /** * string.gmatch (s, pattern) * @@ -671,20 +908,27 @@ public class StringLib extends LFunction { } public void add_value( LBuffer lbuf, int soffset, int end, LValue repl ) { - if ( repl instanceof LString || repl instanceof LNumber ) { + switch ( repl.luaGetType() ) { + case Lua.LUA_TSTRING: + case Lua.LUA_TNUMBER: add_s( lbuf, repl.luaAsString(), soffset, end ); return; - } else if ( repl instanceof LFunction ) { + + case Lua.LUA_TFUNCTION: vm.pushlvalue( repl ); int n = push_captures( true, soffset, end ); vm.call( n, 1 ); - } else if ( repl instanceof LTable ) { + break; + + case Lua.LUA_TTABLE: // Need to call push_onecapture here for the error checking push_onecapture( 0, soffset, end ); LValue k = vm.topointer( -1 ); vm.pop( 1 ); vm.pushlvalue( ((LTable) repl).luaGetTable( vm, k ) ); - } else { + break; + + default: vm.error( "bad argument: string/function/table expected" ); return; } @@ -692,7 +936,7 @@ public class StringLib extends LFunction { repl = vm.topointer( -1 ); if ( !repl.toJavaBoolean() ) { repl = s.substring( soffset, end ); - } else if ( ! ( repl instanceof LString || repl instanceof LNumber ) ) { + } else if ( ! repl.isString() ) { vm.error( "invalid replacement value (a "+repl.luaGetTypeName()+")" ); } vm.pop( 1 ); diff --git a/src/core/org/luaj/vm/LString.java b/src/core/org/luaj/vm/LString.java index 62155a18..2e9a8644 100644 --- a/src/core/org/luaj/vm/LString.java +++ b/src/core/org/luaj/vm/LString.java @@ -66,38 +66,16 @@ public class LString extends LValue { * Characters are encoded using UTF-8. */ public LString(String string) { - // measure bytes required to encode - int n = string.length(); - int b = n; - char c; - for ( int i=0; i= 0x80 ) { - ++b; - if ( c >= 0x800 ) - ++b; - } - } + int b = lengthAsUtf8( string ); byte[] bytes = new byte[b]; - int j = 0; - for ( int i=0; i>6) & 0x1f)); - bytes[j++] = (byte) (0x80 | ( c & 0x3f)); - } else { - bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f)); - bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f)); - bytes[j++] = (byte) (0x80 | ( c & 0x3f)); - } - } + encodeToUtf8( string, bytes, 0 ); this.m_bytes = bytes; this.m_offset = 0; this.m_length = b; this.m_hash = hashBytes( bytes, 0, b ); } - + /** * Convert to Java string using UTF-8 encoding */ @@ -157,7 +135,46 @@ public class LString extends LValue { public static LString newStringNoCopy(byte[] buf, int off, int len) { return new LString( buf, off, len ); } - + + /** + * Count the number of bytes required to encode the string as UTF-8. + */ + public static int lengthAsUtf8(String string) { + int n = string.length(); + int b = n; + char c; + for ( int i=0; i= 0x80 ) { + ++b; + if ( c >= 0x800 ) + ++b; + } + } + return b; + } + + /** + * Encode the given Java string as UTF-8 bytes, writing the result to bytes + * starting at offset. The string should be measured first with lengthAsUtf8 + * to make sure the given byte array is large enough. + */ + public static void encodeToUtf8(String string, byte[] bytes, final int startOffset) { + final int n = string.length(); + for ( int i=0, j=startOffset; i>6) & 0x1f)); + bytes[j++] = (byte) (0x80 | ( c & 0x3f)); + } else { + bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f)); + bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f)); + bytes[j++] = (byte) (0x80 | ( c & 0x3f)); + } + } + } + public boolean isString() { return true; } @@ -227,6 +244,14 @@ public class LString extends LValue { return -1; } + public int indexOf( byte b, int start ) { + for ( int i = m_offset + start; i < m_length; ++i ) { + if ( m_bytes[i] == b ) + return i; + } + return -1; + } + public int indexOf( LString s, int start ) { final int slen = s.length(); final int limit = m_offset + m_length - slen; diff --git a/src/test/errors/stringlibargs.lua b/src/test/errors/stringlibargs.lua index 0ee44ad3..ec27c293 100644 --- a/src/test/errors/stringlibargs.lua +++ b/src/test/errors/stringlibargs.lua @@ -47,12 +47,14 @@ checkallerrors('string.find',{somestring,somestring,nonnumber},'bad argument #3' -- string.format local numfmts = {'%c','%d','%E','%e','%f','%g','%G','%i','%o','%u','%X','%x'} local strfmts = {'%q','%s'} +local badfmts = {'%w'} banner('string.format') checkallpass('string.format',{somestring,anylua}) checkallpass('string.format',{numfmts,somenumber}) checkallpass('string.format',{strfmts,somestring}) checkallerrors('string.format',{numfmts,notanumber},'bad argument #2') checkallerrors('string.format',{strfmts,notastring},'bad argument #2') +checkallerrors('string.format',{badfmts,somestring},"invalid option '%w'") -- string.gmatch banner('string.gmatch') diff --git a/src/test/res/strlib.lua b/src/test/res/strlib.lua index e7aaa9b8..bfea641d 100644 --- a/src/test/res/strlib.lua +++ b/src/test/res/strlib.lua @@ -85,10 +85,36 @@ print(#"\0\1\2\3") local s = "My JaCk-O-lAnTeRn CaSe TeXt" print(s, string.len(s), #s) +-- string.format +print(string.format("(%.0d) (%.0d) (%.0d)", 0, -5, 9)) +print(string.format("(%.1d) (%.1d) (%.1d)", 0, -5, 9)) +print(string.format("(%.2d) (%.2d) (%.2d)", 0, -5, 9)) + +print(string.format("(%+.0d) (%+.0d) (%+.0d)", 0, -5, 9)) +print(string.format("(%+.1d) (%+.1d) (%+.1d)", 0, -5, 9)) +print(string.format("(%+.2d) (%+.2d) (%+.2d)", 0, -5, 9)) + +print(string.format("(%+3d) (% 3d) (%+ 3d)", 55, 55, 55)) + +print(string.format("(%-1d) (%-1d) (%-1d)", 1, 12, -12)) +print(string.format("(%-2d) (%-2d) (%-2d)", 1, 12, -12)) +print(string.format("(%-3d) (%-3d) (%-3d)", 1, 12, -12)) + +print(string.format("(%8x) (%8d) (%8o)", 255, 255, 255)) +print(string.format("(%08x) (%08d) (%08o)", 255, 255, 255)) + +print(string.format("simple%ssimple", " simple ")) + +specials = "\"specials\": %% \000 \r \n" +print(string.format("%s\n%q\n", specials, specials)) +print(string.format("%%")) +print(string.format("this is a %s long string", string.rep("really, ", 30))) + local function pc(...) local s,e = pcall(...) return s and e or 'false-'..type(e) end + local function strtests(name,func,...) print(name, 'good', pc( func, ... ) ) print(name, 'empty', pc( func ) )