Fix empty matches in patterns.

```
do   -- new (5.3.3) semantics for empty matches
  assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")

  local res = ""
  local sub = "a  \nbc\t\td"
  local i = 1
  for p, e in string.gmatch(sub, "()%s*()") do
    res = res .. string.sub(sub, i, p - 1) .. "-"
    i = e
  end
  assert(res == "-a-b-c-d-")
end
```
This commit is contained in:
Enyby
2019-11-09 23:20:06 +02:00
parent 6bc8fd6b1b
commit 8c42c4712b

View File

@@ -527,19 +527,20 @@ public class StringLib extends TwoArgFunction {
private final int srclen; private final int srclen;
private final MatchState ms; private final MatchState ms;
private int soffset; private int soffset;
private int lastmatch;
public GMatchAux(Varargs args, LuaString src, LuaString pat) { public GMatchAux(Varargs args, LuaString src, LuaString pat) {
this.srclen = src.length(); this.srclen = src.length();
this.ms = new MatchState(args, src, pat); this.ms = new MatchState(args, src, pat);
this.soffset = 0; this.soffset = 0;
this.lastmatch = -1;
} }
public Varargs invoke(Varargs args) { public Varargs invoke(Varargs args) {
for ( ; soffset<=srclen; soffset++ ) { for ( ; soffset<=srclen; soffset++ ) {
ms.reset(); ms.reset();
int res = ms.match(soffset, 0); int res = ms.match(soffset, 0);
if ( res >=0 ) { if ( res >=0 && res != lastmatch ) {
int soff = soffset; int soff = soffset;
soffset = res; lastmatch = soffset = res;
if (soff == res) soffset++; /* empty match? go at least one position */
return ms.push_captures( true, soff, res ); return ms.push_captures( true, soff, res );
} }
} }
@@ -598,6 +599,7 @@ public class StringLib extends TwoArgFunction {
LuaString src = args.checkstring( 1 ); LuaString src = args.checkstring( 1 );
final int srclen = src.length(); final int srclen = src.length();
LuaString p = args.checkstring( 2 ); LuaString p = args.checkstring( 2 );
int lastmatch = -1; /* end of last match */
LuaValue repl = args.arg( 3 ); LuaValue repl = args.arg( 3 );
int max_s = args.optint( 4, srclen + 1 ); int max_s = args.optint( 4, srclen + 1 );
final boolean anchor = p.length() > 0 && p.charAt( 0 ) == '^'; final boolean anchor = p.length() > 0 && p.charAt( 0 ) == '^';
@@ -610,18 +612,15 @@ public class StringLib extends TwoArgFunction {
while ( n < max_s ) { while ( n < max_s ) {
ms.reset(); ms.reset();
int res = ms.match( soffset, anchor ? 1 : 0 ); int res = ms.match( soffset, anchor ? 1 : 0 );
if ( res != -1 ) { if ( res != -1 && res != lastmatch ) { /* match? */
n++; n++;
ms.add_value( lbuf, soffset, res, repl ); ms.add_value( lbuf, soffset, res, repl ); /* add replacement to buffer */
soffset = lastmatch = res;
} }
if ( res != -1 && res > soffset ) else if ( soffset < srclen ) /* otherwise, skip one character */
soffset = res;
else if ( soffset < srclen )
lbuf.append( (byte) src.luaByte( soffset++ ) ); lbuf.append( (byte) src.luaByte( soffset++ ) );
else else break; /* end of subject */
break; if ( anchor ) break;
if ( anchor )
break;
} }
lbuf.append( src.substring( soffset, srclen ) ); lbuf.append( src.substring( soffset, srclen ) );
return varargsOf(lbuf.tostring(), valueOf(n)); return varargsOf(lbuf.tostring(), valueOf(n));