/*
 Program WinCaml: Graphical User Interface
 for interactive use of Caml-Light and Ocaml.
 Copyright (C) 2005-2015 Jean Mouric 35700 Rennes France
 email: jean.mouric@orange.fr
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

// File lex.zz.cpp

#include "platform.h"
#include "CamlIndenter.h"

static size_t tokenbegin;
static size_t tokenend;

static int t;
static char* p;
static char* str;
static char buf[256];

extern size_t currentPos;
extern size_t token_length;
size_t zzleng;
extern int leading;
extern int trailing;
extern int newline;

static int d;
static size_t sz;

static const int keywordsMax = 34;
static const char* keywords[] ={ "and", "asr", "begin", "do", "done", "downto", "else", "end", "for", "fun", "function", "if", "in", "land", "let", "lnot", "lor", "lsl", "lsr",  "lxor", "match", "mod", "mutable", "not", "of", "or", "parser", "rec", "then", "to", "try", "type", "where", "while", "with"};

static const int keyvalues[] = {AND2, OTHER2, BEGIN2, DO2, DONE2, TO2, ELSE2, END2, FOR2, FUN2, FUNCTION2, IF2, IN2, OTHER2, LET2, OTHER2, OTHER2, OTHER2, OTHER2, OTHER2, MATCH2, OTHER2, MUTABLE2, OTHER2, OF2, OTHER2, PARSER2, REC2, THEN2, TO2, TRY2, TYPE2, WHERE2, WHILE2, WITH2};

static const int opCharsMax = 18;
static const char opChars[] = {'!', '$', '%', '&', '*', '+', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '^', '|', '~'};

static const int opFirstCharsMax = 6;
static const char opFirstChars[] = {'$', '%', '&', '+', '/', '@', '^'};

//------------------------------------------------------------------------------------------------------------------------------------

bool is_digit(char c);
static inline int ret(int tok, int lead, int trail);
static inline void whitespace();
static inline int keyvalue(char* buf);
static inline bool isOpChar(char c);
static inline bool isUpperCase(int c);
static inline bool isLowerCase(int c);
static inline bool isAlpha(int c);
static inline bool isAlNum(int c);

static inline int ret(int tok, int lead, int trail)
{
    currentPos = tokenend = (size_t)(p - str);
    if (tok == 0 || currentPos > sz) {
        currentPos = sz;
        leading = 0;
        trailing = 0;
        free(str);
        return 0;
    }
    token_length = zzleng = currentPos - tokenbegin;
    leading = lead;
    trailing = trail;
    return tok;
}

static inline void whitespace()
{
    while(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
        if (*p == ' ' || *p == '\t') {
            p++;
        }
        else if (*p == '\n') {
            p++; newline++;
        }
        else {
            newline++;
            p++;
            if (*p == '\n') {
                p++;
            }
        }
    }
}

static inline int keyvalue(char* s)
{
    int l1 = 0;
    int l2 = keywordsMax;
    int m;
    if (strcmp(s, keywords[0]) < 0 || strcmp(keywords[keywordsMax], s) < 0) {
        return 0;
    }
    while (((m = (l1 + l2) / 2) != l1)) {
        if (strcmp(keywords[m], s) < 0) {
            l1 = m;
        }
        else if (strcmp(keywords[m], s) > 0){
            l2 = m;
        }
        else return keyvalues[m];
    }
    if (strcmp(keywords[l1], s) == 0) return keyvalues[l1];
    else if (strcmp(keywords[l2], s) == 0) return keyvalues[l2];
    else return 0;
}

static inline bool isOpChar(char c)
{
    int l1 = 0;
    int l2 = opCharsMax;
    int m;
    if (c < opChars[0] || c > opChars[opCharsMax]) {
        return false;
    }
    while (((m = (l1 + l2) / 2) != l1)) {
        if (opChars[m] < c) {
            l1 = m;
        }
        else if (opChars[m] > c){
            l2 = m;
        }
        else return true;
    }
    return opChars[l1] == c || opChars[l2] == c;
}

static inline bool isUpperCase(int c)
{
    return (64 < c && c < 91) || (191 < c && c < 215) || (215 < c && c < 223);
}

static inline bool isLowerCase(int c)
{
    return (96 < c && c < 123) || (222 < c && c < 247) || (248 < c && c < 256) || c == '_';
}

static inline bool isAlpha(int c)
{
    return isUpperCase(c) || isLowerCase(c);
}

static inline bool isAlNum(int c)
{
    return isAlpha(c) || is_digit(c);
}

//------------------------------------------------------------------------------------------------------------------------------------

static bool init = true;
static int (*tab[256])() = {NULL};
static  void inittab();
static  int lpar();
static  int rpar();
static  int comment();
static  int lbrace();
static  int rbrace();
static  int lbracket();
static  int rbracket();
static  int bigger();
static  int verticalbar();
static  int doublequote();
static  int backslash();
static  int semicolon();
static  int colon();
static  int minussign();
static  int underscore();
static  int sharp();
static  int comma();
static  int dotpar();
static  int charliteral();
static  int exclam();
static  int smaller();
static  int equalsign();
static  int hexadecimal();
static  int num();
static  int opfirstchars();
static  int alpha();

static  void inittab()
{
    tab[(int)'('] = lpar;
    tab[(int)')'] = rpar;
    tab[(int)'*'] = comment;
    tab[(int)'{'] = lbrace;
    tab[(int)'}'] = rbrace;
    tab[(int)'['] = lbracket;
    tab[(int)']'] = rbracket;
    tab[(int)'>'] = bigger;
    tab[(int)'|'] = verticalbar;
    tab[(int)'"'] = doublequote;
    tab[(int)'\\'] = backslash;
    tab[(int)';'] = semicolon;
    tab[(int)':'] = colon;
    tab[(int)'-'] = minussign;
    tab[(int)'_'] = underscore;
    tab[(int)'#'] = sharp;
    tab[(int)','] = comma;
    tab[(int)'.'] = dotpar;
    tab[(int)'\''] = tab[(int)'`'] = charliteral;
    tab[(int)'!'] = exclam;
    tab[(int)'<'] = smaller;
    tab[(int)'='] = equalsign;
    tab[(int)'0'] = hexadecimal;
	int i;
    for (i = (int)'1'; i <= (int)'9'; i++) {
        tab[i] = num;
    }
    for (i = 0; i <=opFirstCharsMax; i++) {
        tab[(int)opFirstChars[i]] = opfirstchars;
    }
    for (i = 65; i < 256; i++) {
        if (isAlpha(i)) {
            tab[i] = alpha;
        }
    }
}

int lpar()
{
    if (*p == '*') {
        d++;
        t = *(p + 1);
        p += 2;
        while (d != 0) {
            if (t == '\0') {
                p--;
                return ret(OTHER2, 1, 0);
            }
            if (t == '(' && *p == '*') {
                d++;
                t = *(p + 1);
                p += 2;
            }
            else if (t == '*' && *p == ')') {
                d--;
                t = *(p + 1);
                p++;
            }
            else t = *p++;
        }
        return ret(COMMENT2, 1, 1);
    }
    char* r = p;
    while(*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
        p++;
    }
    if (*p == ')') {
        p++;
        return ret(OTHER2, 1, 1);
    }
    p = r;
    return ret(LEFTPAR2, 1, 0);
}

int rpar()
{
    return ret(RIGHTPAR2, 0, 1);    
}

int comment()
{
    if (*p == ')') {
        p++;
        return ret(OTHER2, 1, 1);
    }
    return opfirstchars();
}

int lbrace()
{
    return ret(LEFTBRACE2, 1, 0);
}

int rbrace()
{
    return ret(RIGHTBRACE2, 0, 1);
}

int lbracket()
{
    if (*p == '|') {
        p++;
        return ret(LEFTPAR2, 1, 0);
    }
    if (*p == '<') {
        p++;
        return ret(LSTREAM2, 1, 0);
    }
    return ret(LEFTPAR2, 1, 0);
}

int rbracket()
{
    return ret(RIGHTPAR2, 0, 1);
}

int bigger()
{
    if (*p == ']') {
        p++;
        return ret(RSTREAM2, 0, 1);
    }
    return opfirstchars();
}

int verticalbar()
{
    if (*p == ']') {
        p++;
        return ret(RIGHTPAR2, 0, 1);
    }
    char c = *p;
    if (!isOpChar(c))
    {
        return ret(BAR2, 1, 1);
    }
    return opfirstchars();
}

int doublequote()
{
    t = *p++;
    while (t != '"' && t != '\0') {
        if (t == '\\' && *p == '"') {
            t = *(p + 1);
            p += 2;
        }
        else {
            t = *p++;
        }
    }
    if (t == '\0') {
        p--;
        return ret(OTHER2, 1, 0);
    }
    return ret(STRING2, 1, 1);
}

int backslash()
{
    if (*p == '"') {
        p++;
    }
    return ret(OTHER2, 1, 1);
}

int semicolon()
{
    if (*p == ';') {
        p++;
        return ret(ENDMARK2, 0, 0);
    }
    return ret(SEP2, 0, 1);
}

int colon()
{
    if (*p == ':' || *p == '=') {
        p++;
        return ret(OTHER2, 1, 1);
    }
    return ret(COLON2, 0, 1);
}

int minussign()
{
    if (*p == '>') {
        char c = *(p + 1);
        if (!isOpChar(c))
        {
            p++;
            return ret(RIGHTARROW2, 1, 1);
        }
    }
    return opfirstchars();
}

int underscore()
{
    char* q = p;
    int i = alpha();
    if (p > q) {
        return ret(i, 1, 1);
    }
    return ret(OTHER2, 1, 1);
}

int sharp()
{
    return ret(IGNORE2, 1, 0);
}

int comma()
{
    return ret(OTHER2, 0, 1);
}

int dotpar()
{
    if (*p == '(') {
        p++;
        return ret(DOTPAR2, 0, 0);
        
    }
    return ret(OTHER2, 0, 0);
}

static int charliteral()
{
    if (*p == '\\') {
        p++;
        if (is_digit(*p) && is_digit(*(p + 1)) && is_digit(*(p + 2)) && *(p + 3) == t) {
            p += 4;
        }
        else if (*p == 'x' && isxdigit(*(p + 1)) && isxdigit(*(p + 2)) && *(p + 3) == t) {
            p += 4;
        }
        else if ((*p == 'n' || *p == 't' || *p == 'b' || *p == 'r' || *p == '\\' || *p == '\'' || *p == '`') && *(p + 1) == t) {
            p += 2;
        }
        return ret(OTHER2, 1, 1);
    }
    if (*p == 'x' && *(p + 1) == 'x' && *(p + 2) == t ) {
        p += 3;
        return ret(OTHER2, 1, 1);
    }
	if (*p == '\0') {
		return ret(OTHER2,0 , 0);
	}
    if (*(p + 1) == t) {
        p += 2;
        return ret(OTHER2, 1, 1);
    }
    return ret(OTHER2, 1, 1);
}

int exclam()
{
    return ret(OTHER2, 1, 0);
}

int smaller()
{
    if (*p == '-') {
        char c = *(p + 1);
        if (!isOpChar(c))
        {
            p++;
            return ret(OTHER2, 1, 1);
        }
    }
    return opfirstchars();
}

int equalsign()
{
    if (*p == '=') {
        char c = *(p + 1);
        if (!isOpChar(c))
        {
            p++;
            return ret(EQUALEQUAL2, 1, 1);
        }
    }
    return ret(EQUAL2, 1, 1);
}

int hexadecimal()
{
    if (*p == 'x' || *p == 'X') {
        p++;
        while (isxdigit(*p)) {
            p++;
        }
        return ret(OTHER2, 1, 1);
    }
    return num();
}

int num()
{
    while (is_digit(*p)) {
        p++;
    }
    if (*p == '.') {
        p++;
        while (is_digit(*p)) {
            p++;
        }
    }
    if (*p == 'e' || *p == 'E') {
        p++;
        if (*p == '-' || *p == '+') {
            p++;
        }
        while (is_digit(*p)) {
            p++;
        }
    }
    return ret(OTHER2, 1, 1);
}

int opfirstchars()
{
	char c = *p;
	if (c) {
		char c1 = *(p + 1);
		while (c1 != ')' && isOpChar(c))
		{
			if (c == ';' && c1 == ';') {
				break;
			}
			c = *++p;
			c1 = *(p + 1);
		}
	}
	return ret(OTHER2, 1, 1);
}

int alpha()
{
    int i = 0;
    buf[i++] = t;
    while (isAlNum((unsigned char)*p) || *p == '_' || *p == '\'') {
        t = *p++;
        if (i < 255) buf[i++] = t;
        else break;
    }
    buf[i] = 0;
    int kv = keyvalue(buf);
    if (kv != 0) {
        return ret(kv, 1, 1);
    }
    return ret(IDEN2, 1, 1);    
}

//------------------------------------------------------------------------------------------------------------------------------------

typedef struct yy_buffer_state *BUFFER_STATE;
BUFFER_STATE zz_scan_string(const char* s);
int zzlex();

BUFFER_STATE zz_scan_string(const char* s)
{
    if (init) {
        inittab();
        init = false;
    }
    sz = strlen(s);
    str = (char*)malloc(++sz);
    memcpy(str, s, sz--);
    tokenbegin = 0;
    tokenend = 0;
    token_length = zzleng = 0;
    newline = 0;
    d = 0;
    p = (char*)str;
    return NULL;
}

int zzlex()
{
    whitespace();
    tokenbegin = p - str;
    t = (int)(unsigned char)*p++;
    int (*f)() = tab[t];
    return f ? f() : ret(OTHER2, 1, 1);
}

