Mercurial > mm7
diff lib/lua/lua-5.2.2/llex.c @ 1866:41cc4dd3c122
Lua 5.2.2 added.
author | Nomad |
---|---|
date | Wed, 16 Oct 2013 13:34:26 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/lua/lua-5.2.2/llex.c Wed Oct 16 13:34:26 2013 +0200 @@ -0,0 +1,527 @@ +/* +** $Id: llex.c,v 2.63 2013/03/16 21:10:18 roberto Exp $ +** Lexical Analyzer +** See Copyright Notice in lua.h +*/ + + +#include <locale.h> +#include <string.h> + +#define llex_c +#define LUA_CORE + +#include "lua.h" + +#include "lctype.h" +#include "ldo.h" +#include "llex.h" +#include "lobject.h" +#include "lparser.h" +#include "lstate.h" +#include "lstring.h" +#include "ltable.h" +#include "lzio.h" + + + +#define next(ls) (ls->current = zgetc(ls->z)) + + + +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') + + +/* ORDER RESERVED */ +static const char *const luaX_tokens [] = { + "and", "break", "do", "else", "elseif", + "end", "false", "for", "function", "goto", "if", + "in", "local", "nil", "not", "or", "repeat", + "return", "then", "true", "until", "while", + "..", "...", "==", ">=", "<=", "~=", "::", "<eof>", + "<number>", "<name>", "<string>" +}; + + +#define save_and_next(ls) (save(ls, ls->current), next(ls)) + + +static l_noret lexerror (LexState *ls, const char *msg, int token); + + +static void save (LexState *ls, int c) { + Mbuffer *b = ls->buff; + if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { + size_t newsize; + if (luaZ_sizebuffer(b) >= MAX_SIZET/2) + lexerror(ls, "lexical element too long", 0); + newsize = luaZ_sizebuffer(b) * 2; + luaZ_resizebuffer(ls->L, b, newsize); + } + b->buffer[luaZ_bufflen(b)++] = cast(char, c); +} + + +void luaX_init (lua_State *L) { + int i; + for (i=0; i<NUM_RESERVED; i++) { + TString *ts = luaS_new(L, luaX_tokens[i]); + luaS_fix(ts); /* reserved words are never collected */ + ts->tsv.extra = cast_byte(i+1); /* reserved word */ + } +} + + +const char *luaX_token2str (LexState *ls, int token) { + if (token < FIRST_RESERVED) { /* single-byte symbols? */ + lua_assert(token == cast(unsigned char, token)); + return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) : + luaO_pushfstring(ls->L, "char(%d)", token); + } + else { + const char *s = luaX_tokens[token - FIRST_RESERVED]; + if (token < TK_EOS) /* fixed format (symbols and reserved words)? */ + return luaO_pushfstring(ls->L, LUA_QS, s); + else /* names, strings, and numerals */ + return s; + } +} + + +static const char *txtToken (LexState *ls, int token) { + switch (token) { + case TK_NAME: + case TK_STRING: + case TK_NUMBER: + save(ls, '\0'); + return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); + default: + return luaX_token2str(ls, token); + } +} + + +static l_noret lexerror (LexState *ls, const char *msg, int token) { + char buff[LUA_IDSIZE]; + luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE); + msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); + if (token) + luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); + luaD_throw(ls->L, LUA_ERRSYNTAX); +} + + +l_noret luaX_syntaxerror (LexState *ls, const char *msg) { + lexerror(ls, msg, ls->t.token); +} + + +/* +** creates a new string and anchors it in function's table so that +** it will not be collected until the end of the function's compilation +** (by that time it should be anchored in function's prototype) +*/ +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { + lua_State *L = ls->L; + TValue *o; /* entry for `str' */ + TString *ts = luaS_newlstr(L, str, l); /* create new string */ + setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ + o = luaH_set(L, ls->fs->h, L->top - 1); + if (ttisnil(o)) { /* not in use yet? (see 'addK') */ + /* boolean value does not need GC barrier; + table has no metatable, so it does not need to invalidate cache */ + setbvalue(o, 1); /* t[string] = true */ + luaC_checkGC(L); + } + L->top--; /* remove string from stack */ + return ts; +} + + +/* +** increment line number and skips newline sequence (any of +** \n, \r, \n\r, or \r\n) +*/ +static void inclinenumber (LexState *ls) { + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip `\n' or `\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip `\n\r' or `\r\n' */ + if (++ls->linenumber >= MAX_INT) + luaX_syntaxerror(ls, "chunk has too many lines"); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, + int firstchar) { + ls->decpoint = '.'; + ls->L = L; + ls->current = firstchar; + ls->lookahead.token = TK_EOS; /* no look-ahead token */ + ls->z = z; + ls->fs = NULL; + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + ls->envn = luaS_new(L, LUA_ENV); /* create env name */ + luaS_fix(ls->envn); /* never collect this name */ + luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ +} + + + +/* +** ======================================================= +** LEXICAL ANALYZER +** ======================================================= +*/ + + + +static int check_next (LexState *ls, const char *set) { + if (ls->current == '\0' || !strchr(set, ls->current)) + return 0; + save_and_next(ls); + return 1; +} + + +/* +** change all characters 'from' in buffer to 'to' +*/ +static void buffreplace (LexState *ls, char from, char to) { + size_t n = luaZ_bufflen(ls->buff); + char *p = luaZ_buffer(ls->buff); + while (n--) + if (p[n] == from) p[n] = to; +} + + +#if !defined(getlocaledecpoint) +#define getlocaledecpoint() (localeconv()->decimal_point[0]) +#endif + + +#define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e) + +/* +** in case of format error, try to change decimal point separator to +** the one defined in the current locale and check again +*/ +static void trydecpoint (LexState *ls, SemInfo *seminfo) { + char old = ls->decpoint; + ls->decpoint = getlocaledecpoint(); + buffreplace(ls, old, ls->decpoint); /* try new decimal separator */ + if (!buff2d(ls->buff, &seminfo->r)) { + /* format error with correct decimal point: no more options */ + buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ + lexerror(ls, "malformed number", TK_NUMBER); + } +} + + +/* LUA_NUMBER */ +/* +** this function is quite liberal in what it accepts, as 'luaO_str2d' +** will reject ill-formed numerals. +*/ +static void read_numeral (LexState *ls, SemInfo *seminfo) { + const char *expo = "Ee"; + int first = ls->current; + lua_assert(lisdigit(ls->current)); + save_and_next(ls); + if (first == '0' && check_next(ls, "Xx")) /* hexadecimal? */ + expo = "Pp"; + for (;;) { + if (check_next(ls, expo)) /* exponent part? */ + check_next(ls, "+-"); /* optional exponent sign */ + if (lisxdigit(ls->current) || ls->current == '.') + save_and_next(ls); + else break; + } + save(ls, '\0'); + buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ + if (!buff2d(ls->buff, &seminfo->r)) /* format error? */ + trydecpoint(ls, seminfo); /* try to update decimal point separator */ +} + + +/* +** skip a sequence '[=*[' or ']=*]' and return its number of '='s or +** -1 if sequence is malformed +*/ +static int skip_sep (LexState *ls) { + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + + +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { + save_and_next(ls); /* skip 2nd `[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ + for (;;) { + switch (ls->current) { + case EOZ: + lexerror(ls, (seminfo) ? "unfinished long string" : + "unfinished long comment", TK_EOS); + break; /* to avoid warnings */ + case ']': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd `]' */ + goto endloop; + } + break; + } + case '\n': case '\r': { + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ + break; + } + default: { + if (seminfo) save_and_next(ls); + else next(ls); + } + } + } endloop: + if (seminfo) + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), + luaZ_bufflen(ls->buff) - 2*(2 + sep)); +} + + +static void escerror (LexState *ls, int *c, int n, const char *msg) { + int i; + luaZ_resetbuffer(ls->buff); /* prepare error message */ + save(ls, '\\'); + for (i = 0; i < n && c[i] != EOZ; i++) + save(ls, c[i]); + lexerror(ls, msg, TK_STRING); +} + + +static int readhexaesc (LexState *ls) { + int c[3], i; /* keep input for error message */ + int r = 0; /* result accumulator */ + c[0] = 'x'; /* for error message */ + for (i = 1; i < 3; i++) { /* read two hexadecimal digits */ + c[i] = next(ls); + if (!lisxdigit(c[i])) + escerror(ls, c, i + 1, "hexadecimal digit expected"); + r = (r << 4) + luaO_hexavalue(c[i]); + } + return r; +} + + +static int readdecesc (LexState *ls) { + int c[3], i; + int r = 0; /* result accumulator */ + for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ + c[i] = ls->current; + r = 10*r + c[i] - '0'; + next(ls); + } + if (r > UCHAR_MAX) + escerror(ls, c, i, "decimal escape too large"); + return r; +} + + +static void read_string (LexState *ls, int del, SemInfo *seminfo) { + save_and_next(ls); /* keep delimiter (for error messages) */ + while (ls->current != del) { + switch (ls->current) { + case EOZ: + lexerror(ls, "unfinished string", TK_EOS); + break; /* to avoid warnings */ + case '\n': + case '\r': + lexerror(ls, "unfinished string", TK_STRING); + break; /* to avoid warnings */ + case '\\': { /* escape sequences */ + int c; /* final character to be saved */ + next(ls); /* do not save the `\' */ + switch (ls->current) { + case 'a': c = '\a'; goto read_save; + case 'b': c = '\b'; goto read_save; + case 'f': c = '\f'; goto read_save; + case 'n': c = '\n'; goto read_save; + case 'r': c = '\r'; goto read_save; + case 't': c = '\t'; goto read_save; + case 'v': c = '\v'; goto read_save; + case 'x': c = readhexaesc(ls); goto read_save; + case '\n': case '\r': + inclinenumber(ls); c = '\n'; goto only_save; + case '\\': case '\"': case '\'': + c = ls->current; goto read_save; + case EOZ: goto no_save; /* will raise an error next loop */ + case 'z': { /* zap following span of spaces */ + next(ls); /* skip the 'z' */ + while (lisspace(ls->current)) { + if (currIsNewline(ls)) inclinenumber(ls); + else next(ls); + } + goto no_save; + } + default: { + if (!lisdigit(ls->current)) + escerror(ls, &ls->current, 1, "invalid escape sequence"); + /* digital escape \ddd */ + c = readdecesc(ls); + goto only_save; + } + } + read_save: next(ls); /* read next character */ + only_save: save(ls, c); /* save 'c' */ + no_save: break; + } + default: + save_and_next(ls); + } + } + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, + luaZ_bufflen(ls->buff) - 2); +} + + +static int llex (LexState *ls, SemInfo *seminfo) { + luaZ_resetbuffer(ls->buff); + for (;;) { + switch (ls->current) { + case '\n': case '\r': { /* line breaks */ + inclinenumber(ls); + break; + } + case ' ': case '\f': case '\t': case '\v': { /* spaces */ + next(ls); + break; + } + case '-': { /* '-' or '--' (comment) */ + next(ls); + if (ls->current != '-') return '-'; + /* else is a comment */ + next(ls); + if (ls->current == '[') { /* long comment? */ + int sep = skip_sep(ls); + luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* skip long comment */ + luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ + break; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); /* skip until end of line (or end of file) */ + break; + } + case '[': { /* long string or simply '[' */ + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, seminfo, sep); + return TK_STRING; + } + else if (sep == -1) return '['; + else lexerror(ls, "invalid long string delimiter", TK_STRING); + } + case '=': { + next(ls); + if (ls->current != '=') return '='; + else { next(ls); return TK_EQ; } + } + case '<': { + next(ls); + if (ls->current != '=') return '<'; + else { next(ls); return TK_LE; } + } + case '>': { + next(ls); + if (ls->current != '=') return '>'; + else { next(ls); return TK_GE; } + } + case '~': { + next(ls); + if (ls->current != '=') return '~'; + else { next(ls); return TK_NE; } + } + case ':': { + next(ls); + if (ls->current != ':') return ':'; + else { next(ls); return TK_DBCOLON; } + } + case '"': case '\'': { /* short literal strings */ + read_string(ls, ls->current, seminfo); + return TK_STRING; + } + case '.': { /* '.', '..', '...', or number */ + save_and_next(ls); + if (check_next(ls, ".")) { + if (check_next(ls, ".")) + return TK_DOTS; /* '...' */ + else return TK_CONCAT; /* '..' */ + } + else if (!lisdigit(ls->current)) return '.'; + /* else go through */ + } + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + read_numeral(ls, seminfo); + return TK_NUMBER; + } + case EOZ: { + return TK_EOS; + } + default: { + if (lislalpha(ls->current)) { /* identifier or reserved word? */ + TString *ts; + do { + save_and_next(ls); + } while (lislalnum(ls->current)); + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), + luaZ_bufflen(ls->buff)); + seminfo->ts = ts; + if (isreserved(ts)) /* reserved word? */ + return ts->tsv.extra - 1 + FIRST_RESERVED; + else { + return TK_NAME; + } + } + else { /* single-char tokens (+ - / ...) */ + int c = ls->current; + next(ls); + return c; + } + } + } + } +} + + +void luaX_next (LexState *ls) { + ls->lastline = ls->linenumber; + if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ + ls->t = ls->lookahead; /* use this one */ + ls->lookahead.token = TK_EOS; /* and discharge it */ + } + else + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ +} + + +int luaX_lookahead (LexState *ls) { + lua_assert(ls->lookahead.token == TK_EOS); + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); + return ls->lookahead.token; +} +