RT-Thread
/
rt-thread
peilaus alkaen https://github-proxy.rt-thread.io/RT-Thread/rt-thread.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
							/*
 * This file is part of Espruino, a JavaScript interpreter for Microcontrollers
 *
 * Copyright (C) 2013 Gordon Williams <gw@pur3.co.uk>
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * ----------------------------------------------------------------------------
 * Lexer (convert JsVar strings into a series of tokens)
 * ----------------------------------------------------------------------------
 */
#include "jslex.h"


void jslSeek(JsLex *lex, JslCharPos seekToChar) {
  jsvStringIteratorFree(&lex->it);
  jsvStringIteratorNew(&lex->it, lex->sourceVar, seekToChar);
}

void NO_INLINE jslGetNextCh(JsLex *lex) {
  lex->currCh = lex->nextCh;
  lex->nextCh = jsvStringIteratorGetChar(&lex->it);
  jsvStringIteratorNextInline(&lex->it);
}

static inline void jslTokenAppendChar(JsLex *lex, char ch) {
  /* Add character to buffer but check it isn't too big.
   * Also Leave ONE character at the end for null termination */
  if (lex->tokenl < JSLEX_MAX_TOKEN_LENGTH-1) {
    lex->token[lex->tokenl++] = ch;
  }
#ifdef DEBUG
  else {
    jsWarnAt("Token name is too long! skipping character", lex, lex->tokenStart);
  }
#endif
}

static bool jslIsToken(JsLex *lex, const char *token, int startOffset) {
  int i;
  for (i=startOffset;i<lex->tokenl;i++) {
    if (lex->token[i]!=token[i]) return false;
    // if token is smaller than lex->token, there will be a null char
    // which will be different from the token
  }
  return token[lex->tokenl] == 0; // only match if token ends now
}

void jslGetNextToken(JsLex *lex) {
  lex->tk = LEX_EOF;
  lex->tokenl = 0; // clear token string
  if (lex->tokenValue) {
    jsvUnLock(lex->tokenValue);
    lex->tokenValue = 0;
  }
  while (lex->currCh && isWhitespace(lex->currCh)) jslGetNextCh(lex);
  // newline comments
  if (lex->currCh=='/' && lex->nextCh=='/') {
      while (lex->currCh && lex->currCh!='\n') jslGetNextCh(lex);
      jslGetNextCh(lex);
      jslGetNextToken(lex);
      return;
  }
  // block comments
  if (lex->currCh=='/' && lex->nextCh=='*') {
      while (lex->currCh && !(lex->currCh=='*' && lex->nextCh=='/'))
        jslGetNextCh(lex);
      if (!lex->currCh) {
        lex->tk = LEX_UNFINISHED_COMMENT;
        return; /* an unfinished multi-line comment. When in interactive console,
                   detect this and make sure we accept new lines */
      }
      jslGetNextCh(lex);
      jslGetNextCh(lex);
      jslGetNextToken(lex);
      return;
  }
  // record beginning of this token
  lex->tokenLastStart = lex->tokenStart;
  lex->tokenStart = (JslCharPos)(lex->it.index-2);
  // tokens
  if (isAlpha(lex->currCh) || lex->currCh=='$') { //  IDs
      while (isAlpha(lex->currCh) || isNumeric(lex->currCh) || lex->currCh=='$') {
          jslTokenAppendChar(lex, lex->currCh);
          jslGetNextCh(lex);
      }
      lex->tk = LEX_ID;
      // We do fancy stuff here to reduce number of compares (hopefully GCC creates a jump table)
      switch (lex->token[0]) {
      case 'b': if (jslIsToken(lex,"break", 1)) lex->tk = LEX_R_BREAK;
                break;
      case 'c': if (jslIsToken(lex,"case", 1)) lex->tk = LEX_R_CASE;
                else if (jslIsToken(lex,"continue", 1)) lex->tk = LEX_R_CONTINUE;
                break;
      case 'd': if (jslIsToken(lex,"default", 1)) lex->tk = LEX_R_DEFAULT;
                else if (jslIsToken(lex,"do", 1)) lex->tk = LEX_R_DO;
                break;
      case 'e': if (jslIsToken(lex,"else", 1)) lex->tk = LEX_R_ELSE;
                break;
      case 'f': if (jslIsToken(lex,"false", 1)) lex->tk = LEX_R_FALSE;
                else if (jslIsToken(lex,"for", 1)) lex->tk = LEX_R_FOR;
                else if (jslIsToken(lex,"function", 1)) lex->tk = LEX_R_FUNCTION;
                break;
      case 'i': if (jslIsToken(lex,"if", 1)) lex->tk = LEX_R_IF;
                else if (jslIsToken(lex,"in", 1)) lex->tk = LEX_R_IN;
                else if (jslIsToken(lex,"instanceof", 1)) lex->tk = LEX_R_INSTANCEOF;
                break;
      case 'n': if (jslIsToken(lex,"new", 1)) lex->tk = LEX_R_NEW;
                else if (jslIsToken(lex,"null", 1)) lex->tk = LEX_R_NULL;
                break;
      case 'r': if (jslIsToken(lex,"return", 1)) lex->tk = LEX_R_RETURN;
                break;
      case 's': if (jslIsToken(lex,"switch", 1)) lex->tk = LEX_R_SWITCH;
                break;
      case 't': if (jslIsToken(lex,"this", 1)) lex->tk = LEX_R_THIS;
                else if (jslIsToken(lex,"true", 1)) lex->tk = LEX_R_TRUE;
                else if (jslIsToken(lex,"typeof", 1)) lex->tk = LEX_R_TYPEOF;
                break;
      case 'u': if (jslIsToken(lex,"undefined", 1)) lex->tk = LEX_R_UNDEFINED;
                break;
      case 'w': if (jslIsToken(lex,"while", 1)) lex->tk = LEX_R_WHILE;
                break;
      case 'v': if (jslIsToken(lex,"var", 1)) lex->tk = LEX_R_VAR;
                else if (jslIsToken(lex,"void", 1)) lex->tk = LEX_R_VOID;
                break;
      default: break;
      }
  } else if (isNumeric(lex->currCh)) { // Numbers
      // TODO: check numbers aren't the wrong format
      bool canBeFloating = true;
      if (lex->currCh=='0') {
        jslTokenAppendChar(lex, lex->currCh);
        jslGetNextCh(lex);
      }

      if ((lex->currCh=='x' || lex->currCh=='X') ||
          (lex->currCh=='b' || lex->currCh=='B') ||
          (lex->currCh=='o' || lex->currCh=='O')) {
        canBeFloating = false;
        jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex);
      }
      lex->tk = LEX_INT;
      while (isNumeric(lex->currCh) || (!canBeFloating && isHexadecimal(lex->currCh))) {
        jslTokenAppendChar(lex, lex->currCh);
        jslGetNextCh(lex);
      }
      if (canBeFloating && lex->currCh=='.') {
          lex->tk = LEX_FLOAT;
          jslTokenAppendChar(lex, '.');
          jslGetNextCh(lex);
          while (isNumeric(lex->currCh)) {
            jslTokenAppendChar(lex, lex->currCh);
            jslGetNextCh(lex);
          }
      }
      // do fancy e-style floating point
      if (canBeFloating && (lex->currCh=='e'||lex->currCh=='E')) {
        lex->tk = LEX_FLOAT;
        jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex);
        if (lex->currCh=='-' || lex->currCh=='+') { jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex); }
        while (isNumeric(lex->currCh)) {
          jslTokenAppendChar(lex, lex->currCh); jslGetNextCh(lex);
        }
      }
  } else if (lex->currCh=='"' || lex->currCh=='\'') {
      char delim = lex->currCh;
      lex->tokenValue = jsvNewFromEmptyString();
      // strings...
      jslGetNextCh(lex);
      while (lex->currCh && lex->currCh!=delim) {
          if (lex->currCh == '\\') {
              jslGetNextCh(lex);
              char ch = lex->currCh;
              switch (lex->currCh) {
              case 'n'  : ch = '\n'; jslGetNextCh(lex); break;
              case 'a'  : ch = '\a'; jslGetNextCh(lex); break;
              case 'r'  : ch = '\r'; jslGetNextCh(lex); break;
              case 't'  : ch = '\t'; jslGetNextCh(lex); break;
              case 'x' : { // hex digits
                            char buf[5] = "0x??";
                            jslGetNextCh(lex);
                            buf[2] = lex->currCh; jslGetNextCh(lex);
                            buf[3] = lex->currCh; jslGetNextCh(lex);
                            ch = (char)stringToInt(buf);
                         } break;
              default:
                       if (lex->currCh>='0' && lex->currCh<='7') {
                         // octal digits
                         char buf[5] = "0";
                         buf[1] = lex->currCh;
                         int n=2;
                         jslGetNextCh(lex);
                         if (lex->currCh>='0' && lex->currCh<='7') {
                           buf[n++] = lex->currCh; jslGetNextCh(lex);
                           if (lex->currCh>='0' && lex->currCh<='7') {
                             buf[n++] = lex->currCh; jslGetNextCh(lex);
                           }
                         }
                         buf[n]=0;
                         ch = (char)stringToInt(buf);
                       } else {
                         // for anything else, just push the character through
                         jslGetNextCh(lex);
                       }
                       break;
              }
              if (lex->tokenValue) {
                jslTokenAppendChar(lex, ch);
                jsvAppendCharacter(lex->tokenValue, ch);
              }
          } else {
            if (lex->tokenValue) {
              jslTokenAppendChar(lex, lex->currCh);
              jsvAppendCharacter(lex->tokenValue, lex->currCh);
            }
            jslGetNextCh(lex);
          }
      }
      jslGetNextCh(lex);
      lex->tk = LEX_STR;
  } else {
      // single chars
      lex->tk = lex->currCh;
      jslGetNextCh(lex);
      if (lex->tk=='=' && lex->currCh=='=') { // ==
          lex->tk = LEX_EQUAL;
          jslGetNextCh(lex);
          if (lex->currCh=='=') { // ===
            lex->tk = LEX_TYPEEQUAL;
            jslGetNextCh(lex);
          }
      } else if (lex->tk=='!' && lex->currCh=='=') { // !=
          lex->tk = LEX_NEQUAL;
          jslGetNextCh(lex);
          if (lex->currCh=='=') { // !==
            lex->tk = LEX_NTYPEEQUAL;
            jslGetNextCh(lex);
          }
      } else if (lex->tk=='<') {
        if (lex->currCh=='=') {
          lex->tk = LEX_LEQUAL;
          jslGetNextCh(lex);
        } else if (lex->currCh=='<') {
            lex->tk = LEX_LSHIFT;
            jslGetNextCh(lex);
            if (lex->currCh=='=') { // <<=
              lex->tk = LEX_LSHIFTEQUAL;
              jslGetNextCh(lex);
            }
        }
      } else if (lex->tk=='>') {
        if (lex->currCh=='=') {
          lex->tk = LEX_GEQUAL;
          jslGetNextCh(lex);
        } else if (lex->currCh=='>') {
          lex->tk = LEX_RSHIFT;
          jslGetNextCh(lex);
          if (lex->currCh=='=') { // >>=
            lex->tk = LEX_RSHIFTEQUAL;
            jslGetNextCh(lex);
          } else if (lex->currCh=='>') { // >>>
            jslGetNextCh(lex);
            if (lex->currCh=='=') { // >>>=
              lex->tk = LEX_RSHIFTUNSIGNEDEQUAL;
              jslGetNextCh(lex);
            } else {
              lex->tk = LEX_RSHIFTUNSIGNED;
            }
          }
        }
      }  else if (lex->tk=='+') {
          if (lex->currCh=='=') {
            lex->tk = LEX_PLUSEQUAL;
            jslGetNextCh(lex);
          } else if (lex->currCh=='+') {
            lex->tk = LEX_PLUSPLUS;
            jslGetNextCh(lex);
          }
      }  else if (lex->tk=='-') {
          if (lex->currCh=='=') {
            lex->tk = LEX_MINUSEQUAL;
            jslGetNextCh(lex);
          } else if (lex->currCh=='-') {
            lex->tk = LEX_MINUSMINUS;
            jslGetNextCh(lex);
          }
      } else if (lex->tk=='&') {
          if (lex->currCh=='=') {
            lex->tk = LEX_ANDEQUAL;
            jslGetNextCh(lex);
          } else if (lex->currCh=='&') {
            lex->tk = LEX_ANDAND;
            jslGetNextCh(lex);
          }
      } else if (lex->tk=='|') {
          if (lex->currCh=='=') {
            lex->tk = LEX_OREQUAL;
            jslGetNextCh(lex);
          } else if (lex->tk=='|' && lex->currCh=='|') {
            lex->tk = LEX_OROR;
            jslGetNextCh(lex);
          }
      } else if (lex->tk=='^' && lex->currCh=='=') {
          lex->tk = LEX_XOREQUAL;
          jslGetNextCh(lex);
      } else if (lex->tk=='*' && lex->currCh=='=') {
          lex->tk = LEX_MULEQUAL;
          jslGetNextCh(lex);
      } else if (lex->tk=='/' && lex->currCh=='=') {
          lex->tk = LEX_DIVEQUAL;
          jslGetNextCh(lex);
      } else if (lex->tk=='%' && lex->currCh=='=') {
          lex->tk = LEX_MODEQUAL;
          jslGetNextCh(lex);
      }
  }
  /* This isn't quite right yet */
  lex->tokenLastEnd = lex->tokenEnd;
  lex->tokenEnd = (JslCharPos)(lex->it.index-3)/*because of nextCh/currCh/etc */;
}

static inline void jslPreload(JsLex *lex) {
  // set up..
  jslGetNextCh(lex);
  jslGetNextCh(lex);
  jslGetNextToken(lex);
}

void jslInit(JsLex *lex, JsVar *var) {
  lex->sourceVar = jsvLockAgain(var);
  // reset stuff
  lex->tk = 0;
  lex->tokenStart = 0;
  lex->tokenEnd = 0;
  lex->tokenLastStart = 0;
  lex->tokenLastEnd = 0;
  lex->tokenl = 0;
  lex->tokenValue = 0;
  // set up iterator
  jsvStringIteratorNew(&lex->it, lex->sourceVar, 0);
  jslPreload(lex);
}

void jslKill(JsLex *lex) {
  lex->tk = LEX_EOF; // safety ;)
  jsvStringIteratorFree(&lex->it);
  if (lex->tokenValue) {
    jsvUnLock(lex->tokenValue);
    lex->tokenValue = 0;
  }
  jsvUnLock(lex->sourceVar);
}

void jslSeekTo(JsLex *lex, JslCharPos seekToChar) {
  jslSeek(lex, seekToChar);
  jslPreload(lex);
}

void jslReset(JsLex *lex) {
  jslSeekTo(lex, 0);
}

void jslTokenAsString(int token, char *str, size_t len) {
  // see JS_ERROR_TOKEN_BUF_SIZE
  if (token>32 && token<128) {
      assert(len>=4);
      str[0] = '\'';
      str[1] = (char)token;
      str[2] = '\'';
      str[3] = 0;
      return;
  }

  switch (token) {
      case LEX_EOF : strncpy(str, "EOF", len); return;
      case LEX_ID : strncpy(str, "ID", len); return;
      case LEX_INT : strncpy(str, "INT", len); return;
      case LEX_FLOAT : strncpy(str, "FLOAT", len); return;
      case LEX_STR : strncpy(str, "STRING", len); return;
  }
  if (token>=LEX_EQUAL && token<LEX_R_LIST_END) {
    const char tokenNames[] =
      /* LEX_EQUAL      :   */ "==\0"
      /* LEX_TYPEEQUAL  :   */ "===\0"
      /* LEX_NEQUAL     :   */ "!=\0"
      /* LEX_NTYPEEQUAL :   */ "!==\0"
      /* LEX_LEQUAL    :    */ "<=\0"
      /* LEX_LSHIFT     :   */ "<<\0"
      /* LEX_LSHIFTEQUAL :  */ "<<=\0"
      /* LEX_GEQUAL      :  */ ">=\0"
      /* LEX_RSHIFT      :  */ ">>\0"
      /* LEX_RSHIFTUNSIGNED */ ">>>\0"
      /* LEX_RSHIFTEQUAL :  */ ">>=\0"
      /* LEX_RSHIFTUNSIGNEDEQUAL */ ">>>=\0"
      /* LEX_PLUSEQUAL   :  */ "+=\0"
      /* LEX_MINUSEQUAL  :  */ "-=\0"
      /* LEX_PLUSPLUS :     */ "++\0"
      /* LEX_MINUSMINUS     */ "--\0"
      /* LEX_MULEQUAL :     */ "*=\0"
      /* LEX_DIVEQUAL :     */ "/=\0"
      /* LEX_MODEQUAL :     */ "%=\0"
      /* LEX_ANDEQUAL :     */ "&=\0"
      /* LEX_ANDAND :       */ "&&\0"
      /* LEX_OREQUAL :      */ "|=\0"
      /* LEX_OROR :         */ "||\0"
      /* LEX_XOREQUAL :     */ "^=\0"

      // reserved words
      /*LEX_R_IF :       */ "if\0"
      /*LEX_R_ELSE :     */ "else\0"
      /*LEX_R_DO :       */ "do\0"
      /*LEX_R_WHILE :    */ "while\0"
      /*LEX_R_FOR :      */ "for\0"
      /*LEX_R_BREAK :    */ "return\0"
      /*LEX_R_CONTINUE   */ "continue\0"
      /*LEX_R_FUNCTION   */ "function\0"
      /*LEX_R_RETURN     */ "return\0"
      /*LEX_R_VAR :      */ "var\0"
      /*LEX_R_THIS :     */ "this\0"
      /*LEX_R_TRUE :     */ "true\0"
      /*LEX_R_FALSE :    */ "false\0"
      /*LEX_R_NULL :     */ "null\0"
      /*LEX_R_UNDEFINED  */ "undefined\0"
      /*LEX_R_NEW :      */ "new\0"
      /*LEX_R_IN :       */ "in\0"
      /*LEX_R_INSTANCEOF */ "instanceof\0"
      /*LEX_R_SWITCH */     "switch\0"
      /*LEX_R_CASE */       "case\0"
      /*LEX_R_DEFAULT */    "default\0"
      /*LEX_R_TYPEOF :   */ "typeof\0"
      /*LEX_R_VOID :     */ "void\0"
        ;
    unsigned int p = 0;
    int n = token-LEX_EQUAL;
    while (n>0 && p<sizeof(tokenNames)) {
      while (tokenNames[p] && p<sizeof(tokenNames)) p++;
      p++; // skip the zero
      n--; // next token
    }
    assert(n==0);
    strncpy(str, &tokenNames[p], len);
    return;
  }

  assert(len>=10);
  strncpy(str, "?[",len);
  itoa(token, &str[2], 10);
  strncat(str, "]",len);
}

void jslGetTokenString(JsLex *lex, char *str, size_t len) {
  if (lex->tk == LEX_ID) {
    strncpy(str, "ID:", len);
    strncat(str, jslGetTokenValueAsString(lex), len);
  } else if (lex->tk == LEX_STR) {
    strncpy(str, "String:'", len);
    strncat(str, jslGetTokenValueAsString(lex), len);
    strncat(str, "'", len);
  } else
    jslTokenAsString(lex->tk, str, len);
}

char *jslGetTokenValueAsString(JsLex *lex) {
  assert(lex->tokenl < JSLEX_MAX_TOKEN_LENGTH);
  lex->token[lex->tokenl]  = 0; // add final null
  return lex->token;
}

JsVar *jslGetTokenValueAsVar(JsLex *lex) {
  if (lex->tokenValue) {
    return jsvLockAgain(lex->tokenValue);
  } else {
    assert(lex->tokenl < JSLEX_MAX_TOKEN_LENGTH);
    lex->token[lex->tokenl]  = 0; // add final null
    return jsvNewFromString(lex->token);
  }
}

/// Match, and return true on success, false on failure
bool jslMatch(JsLex *lex, int expected_tk) {
  if (lex->tk!=expected_tk) {
      char buf[JS_ERROR_BUF_SIZE];
      size_t bufpos = 0;
      strncpy(&buf[bufpos], "Got ", JS_ERROR_BUF_SIZE-bufpos);
      bufpos = strlen(buf);
      jslGetTokenString(lex, &buf[bufpos], JS_ERROR_BUF_SIZE-bufpos);
      bufpos = strlen(buf);
      strncpy(&buf[bufpos], " expected ", JS_ERROR_BUF_SIZE-bufpos);
      bufpos = strlen(buf);
      jslTokenAsString(expected_tk, &buf[bufpos], JS_ERROR_BUF_SIZE-bufpos);
      jsErrorAt(buf, lex, lex->tokenStart);
      // Sod it, skip this token anyway - stops us looping
      jslGetNextToken(lex);
      return false;
  }
  jslGetNextToken(lex);
  return true;
}