#include #include #include #include #include #include "mbsh.tab.h" #include "variable.hpp" #include "ptnode.hpp" #include "regmatch.hpp" #include "function.hpp" #include "lex.hpp" using namespace std; using namespace MBSH; static regmatch_t g_regmatch[] = { { MLEX_HEXNUMBER, "^0[x][\\da-f]+", PCRE_CASELESS }, { MLEX_OCTNUMBER, "^0[\\d]+", 0 }, { MLEX_NUMBER, "^[\\d]+", 0 }, { MLEX_FLOAT, "^[\\d]*\\.[\\d]+", 0 }, { MLEX_DSTRING, "^\"[^\"]*[^\\\\]?\"", 0 }, { MLEX_VARIABLE, "^\\$[a-z_][\\w]*", PCRE_CASELESS }, { MLEX_SYSVARIABLE, "^\\$[\\d]+", 0 }, { MLEX_INCL, "^\\+\\+", 0 }, { MLEX_DECL, "^\\-\\-", 0 }, { MLEX_EQ, "^==", 0 }, { MLEX_NE, "^!=", 0 }, { MLEX_LEAB, "^\\<=", 0 }, { MLEX_REAB, "^\\>=", 0 }, { MLEX_IF, "^if", 0 }, { MLEX_ELSE, "^else", 0 }, { MLEX_RETURN, "^return", 0 }, { MLEX_SUB, "^sub", 0 }, { MLEX_WHILE, "^while", 0 }, { MLEX_GLOBAL, "^our", 0 }, { MLEX_IDENT, "^[a-z_][\\w]*", PCRE_CASELESS }, { 0, NULL, 0 } }; static char g_chrmatch_b[] = { ';', '.', '*', '/', '(', ')', '{', '}', ',', '[', ']', '&', '|', '^', '~', 0 }; static char g_chrmatch_a[] = { '=', '+', '-', '<', '>', 0 }; const char *MBSH::g_lexbuffer; long MBSH::g_paren_stack; unsigned long MBSH::g_line; static ptnode *make_node(int token_id, char text[]); int MBSH::lex(ptnode **ppnode) { static regmatch *regex = new regmatch(g_regmatch); static char text[1024]; int ovector[4], i, token_id; //if (regex->has_error()) { // return 0; //} while (*g_lexbuffer != '\0') { if (*g_lexbuffer == ' ' || *g_lexbuffer == '\t') { ++g_lexbuffer; } else if (*g_lexbuffer == '\n') { ++g_line; ++g_lexbuffer; } else { break; } } if (*g_lexbuffer == '\0') { return 0; } token_id = 0; for (i = 0; g_chrmatch_b[i] != 0; ++i) { if (*g_lexbuffer == g_chrmatch_b[i]) { text[0] = *g_lexbuffer; text[1] = '\0'; ++g_lexbuffer; token_id = g_chrmatch_b[i]; break; } } if (token_id == 0) { try { for (i = 0; g_regmatch[i].regex != NULL; ++i) { if (regex->match(g_regmatch[i].id, g_lexbuffer, ovector)) { memcpy(text, g_lexbuffer, ovector[1]); text[ovector[1]] = '\0'; g_lexbuffer = g_lexbuffer + ovector[1]; token_id = g_regmatch[i].id; break; } } } catch (runtime_error &e) { fprintf(stderr, "%s", e.what()); exit(0); } } if (token_id == 0) { for (i = 0; g_chrmatch_a[i] != 0; ++i) { if (*g_lexbuffer == g_chrmatch_a[i]) { text[0] = *g_lexbuffer; text[1] = '\0'; ++g_lexbuffer; token_id = g_chrmatch_a[i]; break; } } } if (token_id == 0) { return MLEX_ERROR; } *ppnode = make_node(token_id, text); // printf("%d: %s\n", token_id, text); return token_id; } static ptnode *make_node(int token_id, char text[]) { string *symbol = NULL; variable *var = NULL; ptnode *pnode; switch (token_id) { case MLEX_INCL: case MLEX_DECL: case MLEX_RETURN: case MLEX_IF: case MLEX_WHILE: case MLEX_SUB: case MLEX_EQ: case MLEX_NE: case MLEX_LEAB: case MLEX_REAB: case MLEX_OR: case MLEX_AND: case MLEX_GLOBAL: case '=': case '.': case '+': case '-': case '*': case '/': case '%': case '|': case '&': case '^': case '~': case '!': case '<': case '>': symbol = new string(text); pnode = ptnode::newnode(symbol); pnode->token_id = token_id + TOKEN_OPERATION; break; case MLEX_IDENT: symbol = new string(text); pnode = ptnode::newnode(symbol); pnode->token_id = token_id + TOKEN_FUNCTION; break; case MLEX_VARIABLE: case MLEX_SYSVARIABLE: symbol = new string(text); pnode = ptnode::newnode(symbol); pnode->token_id = token_id; break; case MLEX_NUMBER: case MLEX_HEXNUMBER: case MLEX_OCTNUMBER: case MLEX_FLOAT: var = new variable(); var->set_string(text); pnode = ptnode::newnode(var); token_id = MLEX_NUMBER; /* number */ pnode->token_id = token_id; break; case MLEX_DSTRING: var = new variable(); text[(strlen(text) -1)] = '\0'; var->set_string(&text[1]); pnode = ptnode::newnode(var); pnode->token_id = token_id; break; default: pnode = NULL; break; } if (pnode != NULL) { pnode->line = g_line; } return pnode; }