[Date Prev][Date Next][Thread Prev][Thread Next]
[Date Index]
[Thread Index]
- Subject: Re: Plea for the support of unicode escape sequences
- From: Edgar Toernig <froese@...>
- Date: Tue, 28 Jun 2011 21:47:52 +0200
Rebel Neurofog wrote:
> > I know that Lua's authors try to avoid bloat, but these additional
> > 176 bytes (that's what an implementation of the \u4x/\U8x variant on
> > x86-32 costs) are IMHO very well spent.
>
> Good call. I'm not sure if I personally want in official release,
> but that could be handy for my project.
>
> Could you show up a patch for Lua 5.1.4, please?
I originally implemented it for the 5.2-rc and had to modify it
slightly for 5.1. It got a little larger as 5.1 has no readhexaesc.
As a bonus you get hex-escapes, too.
Here you go:
diff --git a/src/llex.c b/src/llex.c
index 6dc3193..bc22303 100644
--- a/src/llex.c
+++ b/src/llex.c
@@ -273,6 +273,52 @@ static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
}
+static lu_int32 readhexaesc (LexState *ls, int ndigits) {
+ int buf[16], c, i;
+ lu_int32 x = 0;
+ buf[0] = '\\';
+ buf[1] = ls->current;
+ for (i = 0; i < ndigits; ++i) {
+ buf[2 + i] = c = next(ls);
+ if (!isxdigit(c))
+ {
+ /* prepare error message - show the valid part of the sequence */
+ int j;
+ luaZ_resetbuffer(ls->buff);
+ for (j = 0; j < i + 2; ++j)
+ save(ls, buf[j]);
+ luaX_lexerror(ls, "hexadecimal digit expected", TK_STRING);
+ }
+ if (isdigit(c))
+ c -= '0';
+ else
+ c = tolower(c) - 'a' + 10;
+ x = (x << 4) + c;
+ }
+ return x;
+}
+
+
+static void read_and_save_uniesc (LexState *ls, int ndigits) {
+ lu_int32 x = readhexaesc(ls, ndigits);
+ next(ls);
+ if (x > 0x7f) {
+ int buf[8], n = 0;
+ lu_int32 m = 0x3f;
+ do {
+ buf[n++] = 0x80 + (x & 0x3f);
+ x >>= 6;
+ m >>= 1;
+ } while (x > m);
+ save(ls, (~m << 1) + x);
+ while (n)
+ save(ls, buf[--n]);
+ }
+ else
+ save(ls, x);
+}
+
+
static void read_string (LexState *ls, int del, SemInfo *seminfo) {
save_and_next(ls);
while (ls->current != del) {
@@ -295,6 +341,9 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) {
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
+ case 'x': c = readhexaesc(ls, 2); break;
+ case 'u': read_and_save_uniesc(ls, 4); continue;
+ case 'U': read_and_save_uniesc(ls, 8); continue;
case '\n': /* go through */
case '\r': save(ls, '\n'); inclinenumber(ls); continue;
case EOZ: continue; /* will raise an error next loop */