Add partial implementation of string.format.

Not yet implemented:
(1) '#' flag (use alternate format)
(2) Floating point formats

2 digit widths and precisions are not yet tested.
This commit is contained in:
Ian Farmer
2008-07-24 06:38:35 +00:00
parent 7f1c388919
commit 28f349a6fa
5 changed files with 338 additions and 34 deletions

View File

@@ -45,6 +45,13 @@ public class LBuffer {
length += alen;
}
public void append( String str ) {
final int alen = LString.lengthAsUtf8( str );
ensureCapacity( length + alen );
LString.encodeToUtf8( str, bytes, length );
length += alen;
}
public void setLength( int length ) {
ensureCapacity( length );
this.length = length;

View File

@@ -27,10 +27,10 @@ import java.io.IOException;
import org.luaj.compiler.DumpState;
import org.luaj.vm.LClosure;
import org.luaj.vm.LFunction;
import org.luaj.vm.LNumber;
import org.luaj.vm.LString;
import org.luaj.vm.LTable;
import org.luaj.vm.LValue;
import org.luaj.vm.Lua;
import org.luaj.vm.LuaState;
@@ -263,8 +263,245 @@ public class StringLib extends LFunction {
* except as arguments to the q option.
*/
static void format( LuaState vm ) {
LString fmt = vm.checklstring( 2 );
final int n = fmt.length();
LBuffer result = new LBuffer(n);
int arg = 2;
for ( int i = 0; i < n; ) {
int c = fmt.luaByte( i++ );
if ( c != L_ESC ) {
result.append( (byte) c );
} else if ( i < n ) {
if ( ( c = fmt.luaByte( i ) ) == L_ESC ) {
++i;
result.append( (byte)L_ESC );
} else {
arg++;
FormatDesc fdsc = new FormatDesc(vm, fmt, i );
i += fdsc.length;
switch ( fdsc.conversion ) {
case 'c':
fdsc.format( result, (byte)vm.checkint( arg ) );
break;
case 'i':
case 'd':
fdsc.format( result, vm.checkint( arg ) );
break;
case 'o':
case 'u':
case 'x':
case 'X':
fdsc.format( result, vm.checklong( arg ) );
break;
case 'e':
case 'E':
case 'f':
case 'g':
case 'G':
fdsc.format( result, vm.checkdouble( arg ) );
break;
case 'q':
addquoted( result, vm.checklstring( arg ) );
break;
case 's': {
LString s = vm.checklstring( arg );
if ( fdsc.precision == -1 && s.length() >= 100 ) {
result.append( s );
} else {
fdsc.format( result, s );
}
} break;
default:
vm.error("invalid option '%"+(char)fdsc.conversion+"' to 'format'");
break;
}
}
}
}
vm.resettop();
vm.pushstring( "" );
vm.pushlstring( result.toLuaString() );
}
private static void addquoted(LBuffer buf, LString s) {
int c;
buf.append( (byte) '"' );
for ( int i = 0, n = s.length(); i < n; i++ ) {
switch ( c = s.luaByte( i ) ) {
case '"': case '\\': case '\n':
buf.append( (byte)'\\' );
buf.append( (byte)c );
break;
case '\r':
buf.append( "\\r" );
break;
case '\0':
buf.append( "\\000" );
break;
default:
buf.append( (byte) c );
break;
}
}
buf.append( (byte) '"' );
}
private static final String FLAGS = "-+ #0";
private static class FormatDesc {
private boolean leftAdjust;
private boolean zeroPad;
private boolean explicitPlus;
private boolean space;
private boolean alternateForm;
private static final int MAX_FLAGS = 5;
private int width;
private int precision;
public final int conversion;
public final int length;
public FormatDesc(LuaState vm, LString strfrmt, final int start) {
int p = start, n = strfrmt.length();
int c = 0;
boolean moreFlags = true;
while ( moreFlags ) {
switch ( c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ) ) {
case '-': leftAdjust = true; break;
case '+': explicitPlus = true; break;
case ' ': space = true; break;
case '#': alternateForm = true; break;
case '0': zeroPad = true; break;
default: moreFlags = false; break;
}
}
if ( p - start > MAX_FLAGS )
vm.error("invalid format (repeated flags)");
width = -1;
if ( Character.isDigit( (char)c ) ) {
width = c - '0';
c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 );
if ( Character.isDigit( (char) c ) ) {
width = width * 10 + (c - '0');
c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 );
}
}
precision = -1;
if ( c == '.' ) {
c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 );
if ( Character.isDigit( (char) c ) ) {
precision = c - '0';
c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 );
if ( Character.isDigit( (char) c ) ) {
precision = precision * 10 + (c - '0');
c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 );
}
}
}
if ( Character.isDigit( (char) c ) )
vm.error("invalid format (width or precision too long)");
zeroPad &= !leftAdjust; // '-' overrides '0'
conversion = c;
length = p - start;
}
public void format(LBuffer buf, byte c) {
// TODO: not clear that any of width, precision, or flags apply here.
buf.append(c);
}
public void format(LBuffer buf, long number) {
String digits;
if ( number == 0 && precision == 0 ) {
digits = "";
} else {
int radix;
switch ( conversion ) {
case 'x':
case 'X':
radix = 16;
break;
case 'o':
radix = 8;
break;
default:
radix = 10;
break;
}
digits = Long.toString( number, radix );
if ( conversion == 'X' )
digits = digits.toUpperCase();
}
int minwidth = digits.length();
int ndigits = minwidth;
int nzeros;
if ( number < 0 ) {
ndigits--;
} else if ( explicitPlus || space ) {
minwidth++;
}
if ( precision > ndigits )
nzeros = precision - ndigits;
else if ( precision == -1 && zeroPad && width > minwidth )
nzeros = width - minwidth;
else
nzeros = 0;
minwidth += nzeros;
int nspaces = width > minwidth ? width - minwidth : 0;
if ( !leftAdjust )
pad( buf, ' ', nspaces );
if ( number < 0 ) {
if ( nzeros > 0 ) {
buf.append( (byte)'-' );
digits = digits.substring( 1 );
}
} else if ( explicitPlus ) {
buf.append( (byte)'+' );
} else if ( space ) {
buf.append( (byte)' ' );
}
if ( nzeros > 0 )
pad( buf, '0', nzeros );
buf.append( digits );
if ( leftAdjust )
pad( buf, ' ', nspaces );
}
public void format(LBuffer buf, double x) {
// TODO
buf.append( String.valueOf( x ) );
}
public void format(LBuffer buf, LString s) {
int nullindex = s.indexOf( (byte)'\0', 0 );
if ( nullindex != -1 )
s = s.substring( 0, nullindex );
buf.append(s);
}
public static final void pad(LBuffer buf, char c, int n) {
byte b = (byte)c;
while ( n-- > 0 )
buf.append(b);
}
}
/**
@@ -671,20 +908,27 @@ public class StringLib extends LFunction {
}
public void add_value( LBuffer lbuf, int soffset, int end, LValue repl ) {
if ( repl instanceof LString || repl instanceof LNumber ) {
switch ( repl.luaGetType() ) {
case Lua.LUA_TSTRING:
case Lua.LUA_TNUMBER:
add_s( lbuf, repl.luaAsString(), soffset, end );
return;
} else if ( repl instanceof LFunction ) {
case Lua.LUA_TFUNCTION:
vm.pushlvalue( repl );
int n = push_captures( true, soffset, end );
vm.call( n, 1 );
} else if ( repl instanceof LTable ) {
break;
case Lua.LUA_TTABLE:
// Need to call push_onecapture here for the error checking
push_onecapture( 0, soffset, end );
LValue k = vm.topointer( -1 );
vm.pop( 1 );
vm.pushlvalue( ((LTable) repl).luaGetTable( vm, k ) );
} else {
break;
default:
vm.error( "bad argument: string/function/table expected" );
return;
}
@@ -692,7 +936,7 @@ public class StringLib extends LFunction {
repl = vm.topointer( -1 );
if ( !repl.toJavaBoolean() ) {
repl = s.substring( soffset, end );
} else if ( ! ( repl instanceof LString || repl instanceof LNumber ) ) {
} else if ( ! repl.isString() ) {
vm.error( "invalid replacement value (a "+repl.luaGetTypeName()+")" );
}
vm.pop( 1 );

View File

@@ -66,32 +66,10 @@ public class LString extends LValue {
* Characters are encoded using UTF-8.
*/
public LString(String string) {
// measure bytes required to encode
int n = string.length();
int b = n;
char c;
for ( int i=0; i<n; i++ ) {
if ( (c = string.charAt(i)) >= 0x80 ) {
++b;
if ( c >= 0x800 )
++b;
}
}
int b = lengthAsUtf8( string );
byte[] bytes = new byte[b];
int j = 0;
for ( int i=0; i<n; i++ ) {
if ( (c = string.charAt(i)) < 0x80 ) {
bytes[j++] = (byte) c;
} else if ( c < 0x800 ) {
bytes[j++] = (byte) (0xC0 | ((c>>6) & 0x1f));
bytes[j++] = (byte) (0x80 | ( c & 0x3f));
} else {
bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f));
bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f));
bytes[j++] = (byte) (0x80 | ( c & 0x3f));
}
}
encodeToUtf8( string, bytes, 0 );
this.m_bytes = bytes;
this.m_offset = 0;
this.m_length = b;
@@ -158,6 +136,45 @@ public class LString extends LValue {
return new LString( buf, off, len );
}
/**
* Count the number of bytes required to encode the string as UTF-8.
*/
public static int lengthAsUtf8(String string) {
int n = string.length();
int b = n;
char c;
for ( int i=0; i<n; i++ ) {
if ( (c = string.charAt(i)) >= 0x80 ) {
++b;
if ( c >= 0x800 )
++b;
}
}
return b;
}
/**
* Encode the given Java string as UTF-8 bytes, writing the result to bytes
* starting at offset. The string should be measured first with lengthAsUtf8
* to make sure the given byte array is large enough.
*/
public static void encodeToUtf8(String string, byte[] bytes, final int startOffset) {
final int n = string.length();
for ( int i=0, j=startOffset; i<n; i++ ) {
int c;
if ( (c = string.charAt(i)) < 0x80 ) {
bytes[j++] = (byte) c;
} else if ( c < 0x800 ) {
bytes[j++] = (byte) (0xC0 | ((c>>6) & 0x1f));
bytes[j++] = (byte) (0x80 | ( c & 0x3f));
} else {
bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f));
bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f));
bytes[j++] = (byte) (0x80 | ( c & 0x3f));
}
}
}
public boolean isString() {
return true;
}
@@ -227,6 +244,14 @@ public class LString extends LValue {
return -1;
}
public int indexOf( byte b, int start ) {
for ( int i = m_offset + start; i < m_length; ++i ) {
if ( m_bytes[i] == b )
return i;
}
return -1;
}
public int indexOf( LString s, int start ) {
final int slen = s.length();
final int limit = m_offset + m_length - slen;

View File

@@ -47,12 +47,14 @@ checkallerrors('string.find',{somestring,somestring,nonnumber},'bad argument #3'
-- string.format
local numfmts = {'%c','%d','%E','%e','%f','%g','%G','%i','%o','%u','%X','%x'}
local strfmts = {'%q','%s'}
local badfmts = {'%w'}
banner('string.format')
checkallpass('string.format',{somestring,anylua})
checkallpass('string.format',{numfmts,somenumber})
checkallpass('string.format',{strfmts,somestring})
checkallerrors('string.format',{numfmts,notanumber},'bad argument #2')
checkallerrors('string.format',{strfmts,notastring},'bad argument #2')
checkallerrors('string.format',{badfmts,somestring},"invalid option '%w'")
-- string.gmatch
banner('string.gmatch')

View File

@@ -85,10 +85,36 @@ print(#"\0\1\2\3")
local s = "My JaCk-O-lAnTeRn CaSe TeXt"
print(s, string.len(s), #s)
-- string.format
print(string.format("(%.0d) (%.0d) (%.0d)", 0, -5, 9))
print(string.format("(%.1d) (%.1d) (%.1d)", 0, -5, 9))
print(string.format("(%.2d) (%.2d) (%.2d)", 0, -5, 9))
print(string.format("(%+.0d) (%+.0d) (%+.0d)", 0, -5, 9))
print(string.format("(%+.1d) (%+.1d) (%+.1d)", 0, -5, 9))
print(string.format("(%+.2d) (%+.2d) (%+.2d)", 0, -5, 9))
print(string.format("(%+3d) (% 3d) (%+ 3d)", 55, 55, 55))
print(string.format("(%-1d) (%-1d) (%-1d)", 1, 12, -12))
print(string.format("(%-2d) (%-2d) (%-2d)", 1, 12, -12))
print(string.format("(%-3d) (%-3d) (%-3d)", 1, 12, -12))
print(string.format("(%8x) (%8d) (%8o)", 255, 255, 255))
print(string.format("(%08x) (%08d) (%08o)", 255, 255, 255))
print(string.format("simple%ssimple", " simple "))
specials = "\"specials\": %% \000 \r \n"
print(string.format("%s\n%q\n", specials, specials))
print(string.format("%%"))
print(string.format("this is a %s long string", string.rep("really, ", 30)))
local function pc(...)
local s,e = pcall(...)
return s and e or 'false-'..type(e)
end
local function strtests(name,func,...)
print(name, 'good', pc( func, ... ) )
print(name, 'empty', pc( func ) )