You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
508 lines
23 KiB
508 lines
23 KiB
#include "JSParser.h" |
|
#include "../../../StringUtils.h" |
|
|
|
std::vector<std::string> JSParser::getTokens(const std::string &source) const { |
|
std::vector<std::string> tokens; |
|
//std::cout << "source: " << source << "\n" << std::endl; |
|
// tokenize it |
|
size_t cursor; |
|
unsigned char state = 0; |
|
size_t last = 0; |
|
size_t quoteStart = 0; |
|
size_t scopeLevel = 0; |
|
size_t jsonStart = 0; |
|
size_t jsonLevel = 0; |
|
size_t parenLevel = 0; |
|
size_t parenStart = 0; |
|
size_t functionStart = 0; |
|
for (cursor = 0; cursor < source.length(); cursor++) { |
|
if (state == 0) { |
|
if (source[cursor] == '{') { |
|
state = 1; // JSON |
|
jsonStart = cursor; |
|
jsonLevel++; |
|
//std::cout << "Entering JSON: " << cursor << std::endl; |
|
} else if (source[cursor] == '(') { |
|
state = 8; // in a function call or prototype |
|
parenStart = cursor; |
|
parenLevel++; |
|
} else if (source[cursor] == '\'') { // quotes just for allowing [;{}\n] in quotes |
|
quoteStart = cursor; |
|
state = 4; |
|
} else if (source[cursor] == '"') { |
|
quoteStart = cursor; |
|
state = 5; |
|
} else if (source[cursor] == '/' && source.length() > cursor + 1 && source[cursor + 1] == '/') { |
|
// single line comment |
|
state = 2; |
|
} else if (source[cursor] == '/' && source.length() > cursor + 1 && source[cursor + 1] == '*') { |
|
// Multiline comment |
|
state = 3; |
|
} else if (source[cursor] == 'v' && source.length() > cursor + 3 && source[cursor + 1] == 'a' |
|
&& source[cursor + 2] == 'r' && source[cursor + 3] == ' ') { |
|
// var |
|
state = 7; |
|
} else if (source[cursor] == 'f' && source.length() > cursor + 8 && source[cursor + 1] == 'u' |
|
&& source[cursor + 2] == 'n' && source[cursor + 3] == 'c' && source[cursor + 4] == 't' |
|
&& source[cursor + 5] == 'i' && source[cursor + 6] == 'o' && source[cursor + 7] == 'n') { |
|
//std::cout << "Entering function: " << cursor << std::endl; |
|
state = 6; |
|
functionStart = cursor; |
|
} |
|
} else if (state == 1) { |
|
// inside a scope (JSON) |
|
if (source[cursor] == '{') { |
|
jsonLevel++; |
|
} else if (source[cursor] == '}') { |
|
jsonLevel--; |
|
if (!jsonLevel) { |
|
//std::cout << "Exiting JSON: " << source.substr(jsonStart, cursor - jsonStart) << "\n" << std::endl; |
|
state = 0; // exit JSON |
|
} |
|
} |
|
} else if (state == 8) { |
|
// inside a paren (function) |
|
//std::cout << "looking at [" << source[cursor] << "]@" << cursor << std::endl; |
|
if (source[cursor] == '(') { |
|
parenLevel++; |
|
} else if (source[cursor] == ')') { |
|
parenLevel--; |
|
if (!parenLevel) { |
|
//std::cout << "Exiting Paren: " << source.substr(parenStart, cursor - parenStart) << "\n" << std::endl; |
|
state = 0; // exit JSON |
|
} |
|
} |
|
} else if (state == 2) { |
|
// inside a single line comment |
|
if (source[cursor] == '\n') { |
|
last = cursor; |
|
state = 0; |
|
} |
|
} else if (state == 3) { |
|
// inside a multiline comment |
|
if (source[cursor] == '*' && source.length() > cursor + 1 && source[cursor + 1] == '/') { |
|
// end multiline comment |
|
last = cursor; |
|
state = 0; |
|
} |
|
} else if (state == 4) { |
|
// inside single quote |
|
if (source[cursor] == '\'') { |
|
if (source[cursor - 1] != '\\') { |
|
//std::string quote = source.substr(quoteStart + 1, cursor - quoteStart - 1); |
|
//std::cout << "single quote: " << quote << std::endl; |
|
state = 0; |
|
} |
|
} |
|
} else if (state == 5) { |
|
// inside double quote |
|
if (source[cursor] == '"') { |
|
if (source[cursor - 1] != '\\') { |
|
//std::string quote = source.substr(quoteStart + 1, cursor - quoteStart - 1); |
|
//std::cout << "double quote: " << quote << std::endl; |
|
state = 0; |
|
} |
|
} |
|
} else if (state == 7) { |
|
} |
|
|
|
// |
|
if (source[cursor] == '{') { |
|
scopeLevel++; |
|
} |
|
bool endIt = false; |
|
if (source[cursor] == '}') { |
|
scopeLevel--; |
|
if (state == 6 && !scopeLevel) { |
|
//std::cout << "Exiting function: " << source.substr(functionStart, cursor - functionStart) << "\n" << std::endl; |
|
state = 0; |
|
endIt = true; |
|
} |
|
} |
|
|
|
// state 0 or 7, ignore states 1-6 |
|
if ((state == 0 || state == 7) && !scopeLevel) { |
|
if (source[cursor] == '\n' || source[cursor] == ';' || endIt || (source[cursor] == ',' && state != 7)) { |
|
// FIXME: ; in for loops |
|
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor ); |
|
if (source[cursor] == '}') { |
|
token += '}'; |
|
} |
|
// scopeLevel[" << scopeLevel << "]" |
|
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "] endIt[" << endIt << "]" << std::endl; |
|
if (token.length()<3) { |
|
//std::cout << "token too short [" << token << "]" << std::endl; |
|
} else { |
|
tokens.push_back(token); |
|
} |
|
last = cursor; |
|
|
|
if (state == 7) { // allow var constructs to end normally and take us out of var construct |
|
state = 0; // reset state |
|
} |
|
} |
|
} |
|
} |
|
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor ); |
|
//&& !token.length() // all look like complete valid tokens |
|
if (!state ) { |
|
return tokens; |
|
} |
|
//std::cout << "out of characters in state " << std::to_string(state) << " token[" << token << "]" << std::endl; |
|
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "]" << std::endl; |
|
if (token.length()<3) { |
|
//std::cout << "token too short [" << token << "]" << std::endl; |
|
} else { |
|
tokens.push_back(token); |
|
} |
|
return tokens; |
|
} |
|
|
|
bool doAssignment(js_scope &rootScope, std::string token) { |
|
// FIXME: make sure = isn't in quotes or JSON? |
|
// FIXME: double or triple equal differentiation |
|
//std::cout << "looking at [" << token << "]" << std::endl; |
|
// document.documentElement.classList?($.hasClass=function(e,t){return e.classList.contains(t)},$.addClass=function(e,t){e.classList.add(t)},$.removeClass=function(e,t){e.classList.remove(t)}):($.hasClass=function(e,t){return-1!=(" "+e.className+" ").indexOf(" "+t+" ")},$.addClass=function(e,t){e.className=""===e.className?t:e.className+" "+t},$.removeClass=function(e,t){e.className=(" "+e.className+" ").replace(" "+t+" ","")}) |
|
|
|
std::vector<std::string> expression_parts; |
|
size_t cursor; |
|
size_t last = 0; |
|
size_t quoteStart = 0; |
|
size_t parenStart = 0; |
|
size_t parenLevel = 0; |
|
size_t trinaryLevel = 0; |
|
unsigned char state = 0; |
|
for (cursor = 0; cursor < token.length(); cursor++) { |
|
if (state == 0) { |
|
// = |
|
// || |
|
// && |
|
// <, >, <=, >=, ==, ===, !=, !== |
|
// +, - |
|
// *, /, % |
|
// ?, >>, << |
|
|
|
if (token[cursor] == '"') { |
|
quoteStart = cursor; |
|
state = 4; |
|
} else |
|
if (token[cursor] == '"') { |
|
quoteStart = cursor; |
|
state = 5; |
|
} else |
|
if (token[cursor] == '(') { |
|
parenStart = cursor; |
|
parenLevel++; |
|
state = 8; |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("("); |
|
} |
|
|
|
// single = |
|
if (token[cursor] == '=' && token.length() > cursor + 1 && token[cursor + 1] != '=') { |
|
//state = 1; |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("="); |
|
} |
|
// || |
|
if (token[cursor] == '|' && token.length() > cursor + 1 && token[cursor + 1] == '|') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("||"); |
|
} |
|
if (token[cursor] == '&' && token.length() > cursor + 1 && token[cursor + 1] == '&') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("&&"); |
|
} |
|
if (token[cursor] == '>' && token.length() > cursor + 1 && token[cursor + 1] != '=' && token[cursor + 1] != '>') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back(">"); |
|
} |
|
if (token[cursor] == '<' && token.length() > cursor + 1 && token[cursor + 1] != '=' && token[cursor + 1] != '<') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("<"); |
|
} |
|
if (token[cursor] == '<' && token.length() > cursor + 1 && token[cursor + 1] == '&') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("<="); |
|
} |
|
if (token[cursor] == '>' && token.length() > cursor + 1 && token[cursor + 1] == '&') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back(">="); |
|
} |
|
if (token[cursor] == '=' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] != '=') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("=="); |
|
} |
|
if (token[cursor] == '=' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] == '=') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("==="); |
|
} |
|
if (token[cursor] == '!' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] != '=') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("!="); |
|
} |
|
if (token[cursor] == '!' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] == '=') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("!=="); |
|
} |
|
// + |
|
if (token[cursor] == '+') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("+"); |
|
} |
|
if (token[cursor] == '-') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("+"); |
|
} |
|
if (token[cursor] == '*') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("*"); |
|
} |
|
if (token[cursor] == '/') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("/"); |
|
} |
|
if (token[cursor] == '%') { |
|
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1; |
|
expression_parts.push_back("%"); |
|
} |
|
if (token[cursor] == '?') { |
|
expression_parts.push_back(token.substr(last, cursor)); last = cursor + 1; |
|
expression_parts.push_back("?"); |
|
trinaryLevel++; |
|
state = 9; |
|
} |
|
|
|
} else if (state == 4) { |
|
if (token[cursor] == '\'') { |
|
if (token[cursor - 1] != '\\') { |
|
std::string quote = token.substr(quoteStart + 1, cursor - quoteStart - 1); |
|
expression_parts.push_back(quote); |
|
//std::cout << "single quote: " << quote << std::endl; |
|
state = 0; |
|
} |
|
} |
|
} else if (state == 5) { |
|
if (token[cursor] == '\'') { |
|
if (token[cursor - 1] != '\\') { |
|
std::string quote = token.substr(quoteStart + 1, cursor - quoteStart - 1); |
|
expression_parts.push_back(quote); |
|
//std::cout << "single quote: " << quote << std::endl; |
|
state = 0; |
|
} |
|
} |
|
} else if (state == 8) { |
|
if (token[cursor] == '(') { |
|
parenLevel++; |
|
} else |
|
if (token[cursor] == ')') { |
|
parenLevel--; |
|
if (!parenLevel) { |
|
expression_parts.push_back(token.substr(last, cursor)); last = cursor; |
|
expression_parts.push_back(")"); |
|
state = 0; |
|
} |
|
} |
|
} else if (state == 9) { |
|
if (token[cursor] == '?') { |
|
trinaryLevel++; |
|
} else |
|
if (token[cursor] == ':') { |
|
trinaryLevel--; |
|
if (!trinaryLevel) { |
|
expression_parts.push_back(token.substr(last, cursor)); last = cursor + 1; |
|
expression_parts.push_back(":"); |
|
state = 0; |
|
} |
|
} |
|
} |
|
} |
|
std::cout << "expression token[" << token << "]" << std::endl; |
|
std::cout << "expression debug" << std::endl; |
|
for(auto it : expression_parts) { |
|
std::cout << "[" << it << "]" << std::endl; |
|
} |
|
std::cout << "expression end" << std::endl; |
|
|
|
auto hasTripleEqual = token.find("==="); |
|
auto hasDoubleEqual = std::string::npos; |
|
auto hasSingleEqual = std::string::npos; |
|
if (hasTripleEqual == std::string::npos) { |
|
hasDoubleEqual = token.find("=="); |
|
} else { |
|
// process === expression |
|
std::cout << "JSParser:::doAssignment - strict compare not implemented" << std::endl; |
|
//std::cout << "token[" << token << "]" << std::endl; |
|
} |
|
if (hasDoubleEqual == std::string::npos) { |
|
hasSingleEqual = token.find("="); |
|
} else { |
|
// process == expression |
|
std::cout << "JSParser:::doAssignment - compare not implemented" << std::endl; |
|
} |
|
if (hasSingleEqual != std::string::npos) { |
|
auto keyValue = split(token, '='); |
|
if (keyValue.size() < 2) { |
|
std::cout << "JSParser:::doAssignment - bad var parse " << keyValue[0] << std::endl; |
|
return false; |
|
} |
|
// FIXME: dot notation in keys |
|
auto key = trim(keyValue[0]); |
|
// FIXME: is value a lambda |
|
auto value = trim(keyValue[1]); |
|
//std::cout << "[" << key << "=" << value << "]" << std::endl; |
|
rootScope.variables[key] = value; |
|
} else { |
|
// var bob; just make sure the variable exists |
|
rootScope.variables[token] = ""; |
|
} |
|
return true; |
|
} |
|
|
|
// extract scopes & scope.variables |
|
// build exeecution tree |
|
std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const { |
|
std::shared_ptr<JavaScript> script = std::make_shared<JavaScript>(); |
|
script->tokens = this->getTokens(source); |
|
|
|
// we need to at least build the root scope |
|
//std::cout << "\nstart script" << std::endl; |
|
for(auto it : script->tokens) { |
|
//std::cout << "parse token[" << it << "]" << std::endl; |
|
if (it.substr(0, 2)=="if") { |
|
auto ifStr = it.substr(2); |
|
// find ( |
|
auto end = ifStr.find('('); |
|
ifStr = ifStr.substr(0, end); |
|
// find ) |
|
end = ifStr.find(')'); |
|
auto ifCondition = ifStr.substr(0, end); |
|
//std::cout << "ifCondition[" << ifCondition << "]" << std::endl; |
|
ifStr = ifStr.substr(0, end); |
|
// do we have a block start? |
|
// find { (block start) |
|
end = ifStr.find('{'); |
|
// if not block start |
|
// else |
|
end = ifStr.find('{'); |
|
std::cout << "if not implemented" << std::endl; |
|
} else if (it.substr(0, 3)=="var") { |
|
auto listStr = it.substr(3); |
|
// FIXME: , in quotes or {} (JSON) <= top priority for 4chan |
|
std::vector<std::string> opens, closes; |
|
opens.push_back("{"); |
|
opens.push_back("'"); |
|
opens.push_back("\""); |
|
closes.push_back("}"); |
|
closes.push_back("'"); |
|
closes.push_back("\""); |
|
auto varList = parseSepButNotBetween(listStr, ",", opens, closes); |
|
//std::cout << "has " << varList.size() << " variables" << std::endl; |
|
for(auto it2 : varList) { |
|
/* |
|
// FIXME: make sure = isn't in quotes or JSON? |
|
// FIXME: double or triple equal differentiation |
|
//std::cout << "looking at [" << it2 << "]" << std::endl; |
|
auto hasTripleEqual = it2.find("==="); |
|
auto hasDoubleEqual = std::string::npos; |
|
auto hasSingleEqual = std::string::npos; |
|
if (hasTripleEqual == std::string::npos) { |
|
hasDoubleEqual = it2.find("=="); |
|
} else { |
|
// process expression |
|
std::cout << "var strict compare not implemented" << std::endl; |
|
} |
|
if (hasDoubleEqual == std::string::npos) { |
|
hasSingleEqual = it2.find("="); |
|
} else { |
|
// process expression |
|
std::cout << "var compare not implemented" << std::endl; |
|
} |
|
if (hasSingleEqual != std::string::npos) { |
|
auto keyValue = split(it2, '='); |
|
if (keyValue.size() < 2) { |
|
std::cout << "bad var parse " << keyValue[0] << std::endl; |
|
continue; |
|
} |
|
// FIXME: dot notation in keys |
|
auto key = trim(keyValue[0]); |
|
auto value = trim(keyValue[1]); |
|
//std::cout << "[" << key << "=" << value <<s"]" << std::endl; |
|
script->rootScope.variables[key] = value; |
|
} else { |
|
// var bob; just make sure the variable exists |
|
script->rootScope.variables[it2] = ""; |
|
} |
|
*/ |
|
doAssignment(script->rootScope, it2); |
|
} |
|
} else if (it.substr(0, 9)=="function ") { |
|
auto defStr = it.substr(9); |
|
// find ( (name end, prototype start) |
|
auto end = defStr.find('('); |
|
auto funcName = defStr.substr(0, end); |
|
defStr = defStr.substr(end + 1); // next char after ( |
|
// find ) (prototype end) |
|
end = defStr.find(')'); |
|
auto prototype = defStr.substr(0, end); |
|
defStr = defStr.substr(end + 1); // next char after ) |
|
// find { (func start) |
|
end = defStr.find('{'); |
|
defStr = defStr.substr(end + 1, defStr.size() - 2); // from { to the end |
|
auto funcTokens = this->getTokens(defStr); |
|
//std::cout << "function [" << funcName << "] prototype [" << prototype << "] has [" << funcTokens.size() << "] tokens" << std::endl; |
|
// __netrunner_function_definition is 31 chars |
|
script->rootScope.variables[funcName] = "__netrunner_function_definition = { prototype: \"" + prototype + "\", code: \"" + defStr + "\" }"; |
|
js_function newFunc; |
|
newFunc.tokens = funcTokens; |
|
newFunc.parent = &script->rootScope; // is this going to a problem? |
|
} else if (it.substr(0, 6)=="return") { |
|
// js expression here |
|
// probably don't need to do anything here atm |
|
std::cout << "return not implemented" << std::endl; |
|
} else if (it.find("=") != std::string::npos) { |
|
// has = so it's an expression |
|
//std::cout << "assignment[" << it << "]" << std::endl; |
|
//std::cout << "assignment not implemented" << std::endl; |
|
doAssignment(script->rootScope, it); |
|
} else if (it.find("(") != std::string::npos && it.find(")") != std::string::npos) { |
|
// has () so it's a function call |
|
//std::cout << "funcCall[" << it << "]" << std::endl; |
|
// we need to start passed any && or || |
|
// need to parse any expression before the function call... |
|
if (it.find("&&") == std::string::npos && it.find("||") == std::string::npos) { |
|
// figure out function name |
|
auto parenStart = it.find("("); |
|
auto funcName = it.substr(0, parenStart); |
|
//std::cout << "I think the function name is [" << funcName << "]" << std::endl; |
|
if (script->rootScope.variables.find(funcName) == script->rootScope.variables.end()) { |
|
std::cout << "Function [" << funcName << "] d.n.e" << std::endl; |
|
} else { |
|
// make sure function is parsed |
|
// and step through tokens |
|
auto arguments = it.substr(parenStart + 1, it.find(")") - parenStart - 1); |
|
std::cout << "functionCall[" << funcName << "](" << arguments << ") not implemented" << std::endl; |
|
//std::cout << "parameters[" << arguments << "]" << std::endl; |
|
} |
|
} else { |
|
std::cout << "expression before functionCall not implemented" << std::endl; |
|
} |
|
//std::cout << "functionCall not implemented" << std::endl; |
|
} else { |
|
std::cout << "unknown_type[" << it << "]" << std::endl; |
|
} |
|
} |
|
//std::cout << "end script" << "\n" << std::endl; |
|
return script; |
|
} |
|
|
|
std::shared_ptr<JavaScript> JSParser::append(std::shared_ptr<JavaScript> &destination, const std::shared_ptr<JavaScript> &source) const { |
|
for(auto it : source->tokens) { |
|
destination->tokens.push_back(it); |
|
} |
|
// merge scopes (instead of reparsing) |
|
for(auto it : source->rootScope.variables) { |
|
destination->rootScope.variables[it.first] = it.second; |
|
} |
|
return destination; |
|
}
|
|
|