31 #define REX_VERSION "Lrexlib " VERSION
34 static void gmatch_pushsubject (lua_State *L,
TArgExec *argE);
35 static int findmatch_exec (TUserdata *ud,
TArgExec *argE);
36 static int split_exec (TUserdata *ud,
TArgExec *argE,
int offset);
37 static int gsub_exec (TUserdata *ud,
TArgExec *argE,
int offset);
38 static int gmatch_exec (TUserdata *ud,
TArgExec *argE);
39 static int compile_regex (lua_State *L,
const TArgComp *argC, TUserdata **pud);
40 static int generate_error (lua_State *L,
const TUserdata *ud,
int errcode);
42 #define ALG_ENVIRONINDEX lua_upvalueindex(1)
45 # define ALG_CHARSIZE 1
48 #ifndef BUFFERZ_PUTREPSTRING
49 # define BUFFERZ_PUTREPSTRING bufferZ_putrepstring
53 # define ALG_GETCARGS(a,b,c)
56 #ifndef DO_NAMED_SUBPATTERNS
57 #define DO_NAMED_SUBPATTERNS(a,b,c)
61 #define METHOD_MATCH 1
63 #define METHOD_TFIND 3
66 static int OptLimit (lua_State *L,
int pos) {
67 if (lua_isnoneornil (L, pos))
68 return GSUB_UNLIMITED;
69 if (lua_isfunction (L, pos))
70 return GSUB_CONDITIONAL;
71 if (lua_isnumber (L, pos)) {
72 int a = lua_tointeger (L, pos);
75 return luaL_typerror (L, pos,
"number or function");
79 static int get_startoffset(lua_State *L,
int stackpos,
size_t len) {
80 int startoffset = (int)luaL_optinteger(L, stackpos, 1);
83 else if(startoffset < 0) {
84 startoffset += len/ALG_CHARSIZE;
88 return startoffset*ALG_CHARSIZE;
92 static TUserdata* test_ud (lua_State *L,
int pos)
95 if (lua_getmetatable(L, pos) &&
96 lua_rawequal(L, -1, ALG_ENVIRONINDEX) &&
97 (ud = (TUserdata *)lua_touserdata(L, pos)) != NULL) {
105 static TUserdata* check_ud (lua_State *L)
107 TUserdata *ud = test_ud(L, 1);
108 if (ud == NULL) luaL_typerror(L, 1, REX_TYPENAME);
113 static void check_subject (lua_State *L,
int pos,
TArgExec *argE)
116 argE->text = lua_tolstring (L, pos, &argE->textlen);
117 stype = lua_type (L, pos);
118 if (stype != LUA_TSTRING && stype != LUA_TTABLE && stype != LUA_TUSERDATA) {
119 luaL_typerror (L, pos,
"string, table or userdata");
120 }
else if (argE->text == NULL) {
122 lua_getfield (L, pos,
"topointer");
123 if (lua_type (L, -1) != LUA_TFUNCTION)
124 luaL_error (L,
"subject has no topointer method");
125 lua_pushvalue (L, pos);
127 type = lua_type (L, -1);
128 if (type != LUA_TLIGHTUSERDATA)
129 luaL_error (L,
"subject's topointer method returned %s (expected lightuserdata)",
130 lua_typename (L, type));
131 argE->text = (
const char*) lua_touserdata (L, -1);
133 argE->textlen = luaL_len (L, pos);
137 static void check_pattern (lua_State *L,
int pos,
TArgComp *argC)
139 if (lua_isstring (L, pos)) {
140 argC->pattern = lua_tolstring (L, pos, &argC->patlen);
143 else if ((argC->ud = test_ud (L, pos)) == NULL)
144 luaL_typerror(L, pos,
"string or " REX_TYPENAME);
147 static void checkarg_new (lua_State *L,
TArgComp *argC) {
148 argC->pattern = luaL_checklstring (L, 1, &argC->patlen);
149 argC->cflags = ALG_GETCFLAGS (L, 2);
150 ALG_GETCARGS (L, 3, argC);
156 check_subject (L, 1, argE);
157 check_pattern (L, 2, argC);
159 argE->reptype = lua_type (L, 3);
160 if (argE->reptype != LUA_TSTRING && argE->reptype != LUA_TTABLE &&
161 argE->reptype != LUA_TFUNCTION) {
162 luaL_typerror (L, 3,
"string, table or function");
166 argE->maxmatch = OptLimit (L, 4);
167 argC->cflags = ALG_GETCFLAGS (L, 5);
168 argE->eflags = (int)luaL_optinteger (L, 6, ALG_EFLAGS_DFLT);
169 ALG_GETCARGS (L, 7, argC);
175 check_subject (L, 1, argE);
176 check_pattern (L, 2, argC);
177 argC->cflags = ALG_GETCFLAGS (L, 3);
178 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
179 ALG_GETCARGS (L, 5, argC);
185 static void checkarg_find_func (lua_State *L,
TArgComp *argC,
TArgExec *argE) {
186 check_subject (L, 1, argE);
187 check_pattern (L, 2, argC);
188 argE->startoffset = get_startoffset (L, 3, argE->textlen);
189 argC->cflags = ALG_GETCFLAGS (L, 4);
190 argE->eflags = (int)luaL_optinteger (L, 5, ALG_EFLAGS_DFLT);
191 ALG_GETCARGS (L, 6, argC);
197 static void checkarg_gmatch_split (lua_State *L,
TArgComp *argC,
TArgExec *argE) {
198 check_subject (L, 1, argE);
199 check_pattern (L, 2, argC);
200 argC->cflags = ALG_GETCFLAGS (L, 3);
201 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
202 ALG_GETCARGS (L, 5, argC);
210 static void checkarg_find_method (lua_State *L,
TArgExec *argE, TUserdata **ud) {
212 check_subject (L, 2, argE);
213 argE->startoffset = get_startoffset (L, 3, argE->textlen);
214 argE->eflags = (int)luaL_optinteger (L, 4, ALG_EFLAGS_DFLT);
218 static int algf_new (lua_State *L) {
220 checkarg_new (L, &argC);
221 return compile_regex (L, &argC, NULL);
224 static void push_substrings (lua_State *L, TUserdata *ud,
const char *text,
227 if (lua_checkstack (L, ALG_NSUB(ud)) == 0) {
229 freelist_free (freelist);
230 luaL_error (L,
"cannot add %d stack slots", ALG_NSUB(ud));
232 for (i = 1; i <= ALG_NSUB(ud); i++) {
233 ALG_PUSHSUB_OR_FALSE (L, ud, text, i);
237 static int algf_gsub (lua_State *L) {
241 int n_match = 0, n_subst = 0, st = 0, last_to = -1;
242 TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
245 checkarg_gsub (L, &argC, &argE);
247 ud = (TUserdata*) argC.ud;
248 lua_pushvalue (L, 2);
250 else compile_regex (L, &argC, &ud);
251 freelist_init (&freelist);
253 if (argE.reptype == LUA_TSTRING) {
254 buffer_init (&BufRep, 256, L, &freelist);
255 BUFFERZ_PUTREPSTRING (&BufRep, argE.funcpos, ALG_NSUB(ud));
258 if (argE.maxmatch == GSUB_CONDITIONAL) {
259 buffer_init (&BufTemp, 1024, L, &freelist);
263 buffer_init (&BufOut, 1024, L, &freelist);
264 while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (
int)argE.textlen) {
267 res = gsub_exec (ud, &argE, st);
268 if (ALG_NOMATCH (res)) {
271 else if (!ALG_ISMATCH (res)) {
272 freelist_free (&freelist);
273 return generate_error (L, ud, res);
275 from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
276 to = ALG_BASE(st) + ALG_SUBEND(ud,0);
278 if (st < (
int)argE.textlen) {
279 buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
288 buffer_addlstring (&BufOut, argE.text + st, from - st);
294 if (argE.reptype == LUA_TSTRING) {
295 size_t iter = 0, num;
297 while (bufferZ_next (&BufRep, &iter, &num, &str)) {
299 buffer_addlstring (pBuf, str, num);
300 else if (num == 0 || ALG_SUBVALID (ud,num))
301 buffer_addlstring (pBuf, argE.text + ALG_BASE(st) + ALG_SUBBEG(ud,num), ALG_SUBLEN(ud,num));
306 else if (argE.reptype == LUA_TTABLE) {
307 if (ALG_NSUB(ud) > 0)
308 ALG_PUSHSUB_OR_FALSE (L, ud, argE.text + ALG_BASE(st), 1);
310 lua_pushlstring (L, argE.text + from, to - from);
311 lua_gettable (L, argE.funcpos);
314 else if (argE.reptype == LUA_TFUNCTION) {
316 lua_pushvalue (L, argE.funcpos);
317 if (ALG_NSUB(ud) > 0) {
318 push_substrings (L, ud, argE.text + ALG_BASE(st), &freelist);
322 lua_pushlstring (L, argE.text + from, to - from);
325 if (0 != lua_pcall (L, narg, 1, 0)) {
326 freelist_free (&freelist);
327 return lua_error (L);
331 if (argE.reptype == LUA_TTABLE || argE.reptype == LUA_TFUNCTION) {
332 if (lua_tostring (L, -1)) {
333 buffer_addvalue (pBuf, -1);
336 else if (!lua_toboolean (L, -1))
337 buffer_addlstring (pBuf, argE.text + from, to - from);
339 freelist_free (&freelist);
340 luaL_error (L,
"invalid replacement value (a %s)", luaL_typename (L, -1));
342 if (argE.maxmatch != GSUB_CONDITIONAL)
346 if (argE.maxmatch == GSUB_CONDITIONAL) {
348 lua_pushvalue (L, argE.funcpos2);
349 lua_pushinteger (L, from/ALG_CHARSIZE + 1);
350 lua_pushinteger (L, to/ALG_CHARSIZE);
351 if (argE.reptype == LUA_TSTRING)
352 buffer_pushresult (&BufTemp);
354 lua_pushvalue (L, -4);
357 if (0 != lua_pcall (L, 3, 2, 0)) {
358 freelist_free (&freelist);
362 if (lua_isstring (L, -2)) {
363 buffer_addvalue (&BufOut, -2);
366 else if (lua_toboolean (L, -2))
367 buffer_addbuffer (&BufOut, &BufTemp);
369 buffer_addlstring (&BufOut, argE.text + from, to - from);
373 if (lua_type (L, -1) == LUA_TNUMBER) {
374 int n = lua_tointeger (L, -1);
377 argE.maxmatch = n_match + n;
379 else if (lua_toboolean (L, -1))
380 argE.maxmatch = GSUB_UNLIMITED;
382 buffer_clear (&BufTemp);
385 if (argE.maxmatch != GSUB_CONDITIONAL)
389 n_subst += curr_subst;
393 else if (st < (
int)argE.textlen) {
395 buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
401 buffer_addlstring (&BufOut, argE.text + st, argE.textlen - st);
402 buffer_pushresult (&BufOut);
403 lua_pushinteger (L, n_match);
404 lua_pushinteger (L, n_subst);
405 freelist_free (&freelist);
410 static int algf_count (lua_State *L) {
414 int n_match = 0, st = 0, last_to = -1;
416 checkarg_count (L, &argC, &argE);
418 ud = (TUserdata*) argC.ud;
419 lua_pushvalue (L, 2);
421 else compile_regex (L, &argC, &ud);
423 while (st <= (
int)argE.textlen) {
425 res = gsub_exec (ud, &argE, st);
426 if (ALG_NOMATCH (res)) {
429 else if (!ALG_ISMATCH (res)) {
430 return generate_error (L, ud, res);
432 to = ALG_BASE(st) + ALG_SUBEND(ud,0);
434 if (st < (
int)argE.textlen) {
444 int from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
453 else if (st < (
int)argE.textlen) {
460 lua_pushinteger (L, n_match);
465 static int finish_generic_find (lua_State *L, TUserdata *ud,
TArgExec *argE,
468 if (ALG_ISMATCH (res)) {
469 if (method == METHOD_FIND)
470 ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE->startoffset), 0);
472 push_substrings (L, ud, argE->text, NULL);
473 else if (method != METHOD_FIND) {
474 ALG_PUSHSUB (L, ud, argE->text, 0);
477 return (method == METHOD_FIND) ? ALG_NSUB(ud) + 2 : ALG_NSUB(ud);
479 else if (ALG_NOMATCH (res))
480 return lua_pushnil (L), 1;
482 return generate_error (L, ud, res);
486 static int generic_find_func (lua_State *L,
int method) {
492 checkarg_find_func (L, &argC, &argE);
493 if (argE.startoffset > (
int)argE.textlen)
494 return lua_pushnil (L), 1;
497 ud = (TUserdata*) argC.ud;
498 lua_pushvalue (L, 2);
500 else compile_regex (L, &argC, &ud);
501 res = findmatch_exec (ud, &argE);
502 return finish_generic_find (L, ud, &argE, method, res);
506 static int algf_find (lua_State *L) {
507 return generic_find_func (L, METHOD_FIND);
511 static int algf_match (lua_State *L) {
512 return generic_find_func (L, METHOD_MATCH);
516 static int gmatch_iter (lua_State *L) {
519 TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
520 argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
521 argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
522 argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
523 last_end = lua_tointeger (L, lua_upvalueindex (5));
526 if (argE.startoffset > (
int)argE.textlen)
528 res = gmatch_exec (ud, &argE);
529 if (ALG_ISMATCH (res)) {
531 if (!ALG_SUBLEN(ud,0)) {
532 if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
533 argE.startoffset += ALG_CHARSIZE;
538 last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0);
539 lua_pushinteger(L, last_end + incr);
540 lua_replace (L, lua_upvalueindex (4));
541 lua_pushinteger(L, last_end);
542 lua_replace (L, lua_upvalueindex (5));
545 push_substrings (L, ud, argE.text, NULL);
549 ALG_PUSHSUB (L, ud, argE.text, 0);
553 else if (ALG_NOMATCH (res))
556 return generate_error (L, ud, res);
561 static int split_iter (lua_State *L) {
562 int incr, last_end, newoffset, res;
564 TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
565 argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
566 argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
567 argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
568 incr = lua_tointeger (L, lua_upvalueindex (5));
569 last_end = lua_tointeger (L, lua_upvalueindex (6));
575 if ((newoffset = argE.startoffset + incr) > (
int)argE.textlen)
577 res = split_exec (ud, &argE, newoffset);
578 if (ALG_ISMATCH (res)) {
579 if (!ALG_SUBLEN(ud,0)) {
580 if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
581 incr += ALG_CHARSIZE;
585 lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0));
586 lua_pushvalue (L, -1);
587 lua_replace (L, lua_upvalueindex (4));
588 lua_replace (L, lua_upvalueindex (6));
589 lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE);
590 lua_replace (L, lua_upvalueindex (5));
592 lua_pushlstring (L, argE.text + argE.startoffset,
593 ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
596 push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
597 return 1 + ALG_NSUB(ud);
600 ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
604 else if (ALG_NOMATCH (res))
607 return generate_error (L, ud, res);
609 lua_pushinteger (L, -1);
610 lua_replace (L, lua_upvalueindex (5));
611 lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset);
616 static int algf_gmatch (lua_State *L)
620 checkarg_gmatch_split (L, &argC, &argE);
622 lua_pushvalue (L, 2);
624 compile_regex (L, &argC, NULL);
625 gmatch_pushsubject (L, &argE);
626 lua_pushinteger (L, argE.eflags);
627 lua_pushinteger (L, 0);
628 lua_pushinteger (L, -1);
629 lua_pushcclosure (L, gmatch_iter, 5);
633 static int algf_split (lua_State *L)
637 checkarg_gmatch_split (L, &argC, &argE);
639 lua_pushvalue (L, 2);
641 compile_regex (L, &argC, NULL);
642 gmatch_pushsubject (L, &argE);
643 lua_pushinteger (L, argE.eflags);
644 lua_pushinteger (L, 0);
645 lua_pushinteger (L, 0);
646 lua_pushinteger (L, -1);
647 lua_pushcclosure (L, split_iter, 6);
652 static void push_substring_table (lua_State *L, TUserdata *ud,
const char *text) {
655 for (i = 1; i <= ALG_NSUB(ud); i++) {
656 ALG_PUSHSUB_OR_FALSE (L, ud, text, i);
657 lua_rawseti (L, -2, i);
662 static void push_offset_table (lua_State *L, TUserdata *ud,
int startoffset) {
665 for (i=1, j=1; i <= ALG_NSUB(ud); i++) {
666 if (ALG_SUBVALID (ud,i)) {
667 ALG_PUSHSTART (L, ud, startoffset, i);
668 lua_rawseti (L, -2, j++);
669 ALG_PUSHEND (L, ud, startoffset, i);
670 lua_rawseti (L, -2, j++);
673 lua_pushboolean (L, 0);
674 lua_rawseti (L, -2, j++);
675 lua_pushboolean (L, 0);
676 lua_rawseti (L, -2, j++);
682 static int generic_find_method (lua_State *L,
int method) {
687 checkarg_find_method (L, &argE, &ud);
688 if (argE.startoffset > (
int)argE.textlen)
689 return lua_pushnil(L), 1;
691 res = findmatch_exec (ud, &argE);
692 if (ALG_ISMATCH (res)) {
695 ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0);
696 push_offset_table (L, ud, ALG_BASE(argE.startoffset));
697 DO_NAMED_SUBPATTERNS (L, ud, argE.text);
700 ALG_PUSHOFFSETS (L, ud, ALG_BASE(argE.startoffset), 0);
701 push_substring_table (L, ud, argE.text);
702 DO_NAMED_SUBPATTERNS (L, ud, argE.text);
706 return finish_generic_find (L, ud, &argE, method, res);
710 else if (ALG_NOMATCH (res))
711 return lua_pushnil (L), 1;
713 return generate_error(L, ud, res);
717 static int algm_find (lua_State *L) {
718 return generic_find_method (L, METHOD_FIND);
720 static int algm_match (lua_State *L) {
721 return generic_find_method (L, METHOD_MATCH);
723 static int algm_tfind (lua_State *L) {
724 return generic_find_method (L, METHOD_TFIND);
726 static int algm_exec (lua_State *L) {
727 return generic_find_method (L, METHOD_EXEC);
730 static void alg_register (lua_State *L,
const luaL_Reg *r_methods,
731 const luaL_Reg *r_functions,
const char *name) {
733 luaL_newmetatable(L, REX_TYPENAME);
734 lua_pushvalue(L, -1);
735 luaL_setfuncs (L, r_methods, 1);
736 lua_pushvalue(L, -1);
737 lua_setfield(L, -2,
"__index");
740 lua_createtable(L, 0, 8);
741 lua_pushvalue(L, -2);
742 luaL_setfuncs (L, r_functions, 1);
743 #ifdef REX_CREATEGLOBALVAR
744 lua_pushvalue(L, -1);
745 lua_setglobal(L, REX_LIBNAME);
747 lua_pushfstring (L, REX_VERSION
" (for %s)", name);
748 lua_setfield (L, -2,
"_VERSION");
749 #ifndef REX_NOEMBEDDEDTEST
750 lua_pushcfunction (L, newmembuffer);
751 lua_setfield (L, -2,
"_newmembuffer");