You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

508 lines
23 KiB

#include "JSParser.h"
#include "../../../StringUtils.h"
std::vector<std::string> JSParser::getTokens(const std::string &source) const {
std::vector<std::string> tokens;
//std::cout << "source: " << source << "\n" << std::endl;
// tokenize it
size_t cursor;
unsigned char state = 0;
size_t last = 0;
size_t quoteStart = 0;
size_t scopeLevel = 0;
size_t jsonStart = 0;
size_t jsonLevel = 0;
size_t parenLevel = 0;
size_t parenStart = 0;
size_t functionStart = 0;
for (cursor = 0; cursor < source.length(); cursor++) {
if (state == 0) {
if (source[cursor] == '{') {
state = 1; // JSON
jsonStart = cursor;
jsonLevel++;
//std::cout << "Entering JSON: " << cursor << std::endl;
} else if (source[cursor] == '(') {
state = 8; // in a function call or prototype
parenStart = cursor;
parenLevel++;
} else if (source[cursor] == '\'') { // quotes just for allowing [;{}\n] in quotes
quoteStart = cursor;
state = 4;
} else if (source[cursor] == '"') {
quoteStart = cursor;
state = 5;
} else if (source[cursor] == '/' && source.length() > cursor + 1 && source[cursor + 1] == '/') {
// single line comment
state = 2;
} else if (source[cursor] == '/' && source.length() > cursor + 1 && source[cursor + 1] == '*') {
// Multiline comment
state = 3;
} else if (source[cursor] == 'v' && source.length() > cursor + 3 && source[cursor + 1] == 'a'
&& source[cursor + 2] == 'r' && source[cursor + 3] == ' ') {
// var
state = 7;
} else if (source[cursor] == 'f' && source.length() > cursor + 8 && source[cursor + 1] == 'u'
&& source[cursor + 2] == 'n' && source[cursor + 3] == 'c' && source[cursor + 4] == 't'
&& source[cursor + 5] == 'i' && source[cursor + 6] == 'o' && source[cursor + 7] == 'n') {
//std::cout << "Entering function: " << cursor << std::endl;
state = 6;
functionStart = cursor;
}
} else if (state == 1) {
// inside a scope (JSON)
if (source[cursor] == '{') {
jsonLevel++;
} else if (source[cursor] == '}') {
jsonLevel--;
if (!jsonLevel) {
//std::cout << "Exiting JSON: " << source.substr(jsonStart, cursor - jsonStart) << "\n" << std::endl;
state = 0; // exit JSON
}
}
} else if (state == 8) {
// inside a paren (function)
//std::cout << "looking at [" << source[cursor] << "]@" << cursor << std::endl;
if (source[cursor] == '(') {
parenLevel++;
} else if (source[cursor] == ')') {
parenLevel--;
if (!parenLevel) {
//std::cout << "Exiting Paren: " << source.substr(parenStart, cursor - parenStart) << "\n" << std::endl;
state = 0; // exit JSON
}
}
} else if (state == 2) {
// inside a single line comment
if (source[cursor] == '\n') {
last = cursor;
state = 0;
}
} else if (state == 3) {
// inside a multiline comment
if (source[cursor] == '*' && source.length() > cursor + 1 && source[cursor + 1] == '/') {
// end multiline comment
last = cursor;
state = 0;
}
} else if (state == 4) {
// inside single quote
if (source[cursor] == '\'') {
if (source[cursor - 1] != '\\') {
//std::string quote = source.substr(quoteStart + 1, cursor - quoteStart - 1);
//std::cout << "single quote: " << quote << std::endl;
state = 0;
}
}
} else if (state == 5) {
// inside double quote
if (source[cursor] == '"') {
if (source[cursor - 1] != '\\') {
//std::string quote = source.substr(quoteStart + 1, cursor - quoteStart - 1);
//std::cout << "double quote: " << quote << std::endl;
state = 0;
}
}
} else if (state == 7) {
}
//
if (source[cursor] == '{') {
scopeLevel++;
}
bool endIt = false;
if (source[cursor] == '}') {
scopeLevel--;
if (state == 6 && !scopeLevel) {
//std::cout << "Exiting function: " << source.substr(functionStart, cursor - functionStart) << "\n" << std::endl;
state = 0;
endIt = true;
}
}
// state 0 or 7, ignore states 1-6
if ((state == 0 || state == 7) && !scopeLevel) {
if (source[cursor] == '\n' || source[cursor] == ';' || endIt || (source[cursor] == ',' && state != 7)) {
// FIXME: ; in for loops
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor );
if (source[cursor] == '}') {
token += '}';
}
// scopeLevel[" << scopeLevel << "]"
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "] endIt[" << endIt << "]" << std::endl;
if (token.length()<3) {
//std::cout << "token too short [" << token << "]" << std::endl;
} else {
tokens.push_back(token);
}
last = cursor;
if (state == 7) { // allow var constructs to end normally and take us out of var construct
state = 0; // reset state
}
}
}
}
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor );
//&& !token.length() // all look like complete valid tokens
if (!state ) {
return tokens;
}
//std::cout << "out of characters in state " << std::to_string(state) << " token[" << token << "]" << std::endl;
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "]" << std::endl;
if (token.length()<3) {
//std::cout << "token too short [" << token << "]" << std::endl;
} else {
tokens.push_back(token);
}
return tokens;
}
bool doAssignment(js_scope &rootScope, std::string token) {
// FIXME: make sure = isn't in quotes or JSON?
// FIXME: double or triple equal differentiation
//std::cout << "looking at [" << token << "]" << std::endl;
// document.documentElement.classList?($.hasClass=function(e,t){return e.classList.contains(t)},$.addClass=function(e,t){e.classList.add(t)},$.removeClass=function(e,t){e.classList.remove(t)}):($.hasClass=function(e,t){return-1!=(" "+e.className+" ").indexOf(" "+t+" ")},$.addClass=function(e,t){e.className=""===e.className?t:e.className+" "+t},$.removeClass=function(e,t){e.className=(" "+e.className+" ").replace(" "+t+" ","")})
std::vector<std::string> expression_parts;
size_t cursor;
size_t last = 0;
size_t quoteStart = 0;
size_t parenStart = 0;
size_t parenLevel = 0;
size_t trinaryLevel = 0;
unsigned char state = 0;
for (cursor = 0; cursor < token.length(); cursor++) {
if (state == 0) {
// =
// ||
// &&
// <, >, <=, >=, ==, ===, !=, !==
// +, -
// *, /, %
// ?, >>, <<
if (token[cursor] == '"') {
quoteStart = cursor;
state = 4;
} else
if (token[cursor] == '"') {
quoteStart = cursor;
state = 5;
} else
if (token[cursor] == '(') {
parenStart = cursor;
parenLevel++;
state = 8;
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("(");
}
// single =
if (token[cursor] == '=' && token.length() > cursor + 1 && token[cursor + 1] != '=') {
//state = 1;
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("=");
}
// ||
if (token[cursor] == '|' && token.length() > cursor + 1 && token[cursor + 1] == '|') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("||");
}
if (token[cursor] == '&' && token.length() > cursor + 1 && token[cursor + 1] == '&') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("&&");
}
if (token[cursor] == '>' && token.length() > cursor + 1 && token[cursor + 1] != '=' && token[cursor + 1] != '>') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back(">");
}
if (token[cursor] == '<' && token.length() > cursor + 1 && token[cursor + 1] != '=' && token[cursor + 1] != '<') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("<");
}
if (token[cursor] == '<' && token.length() > cursor + 1 && token[cursor + 1] == '&') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("<=");
}
if (token[cursor] == '>' && token.length() > cursor + 1 && token[cursor + 1] == '&') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back(">=");
}
if (token[cursor] == '=' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] != '=') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("==");
}
if (token[cursor] == '=' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] == '=') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("===");
}
if (token[cursor] == '!' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] != '=') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("!=");
}
if (token[cursor] == '!' && token.length() > cursor + 2 && token[cursor + 1] == '=' && token[cursor + 2] == '=') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("!==");
}
// +
if (token[cursor] == '+') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("+");
}
if (token[cursor] == '-') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("+");
}
if (token[cursor] == '*') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("*");
}
if (token[cursor] == '/') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("/");
}
if (token[cursor] == '%') {
expression_parts.push_back(token.substr(last, cursor - 1)); last = cursor + 1;
expression_parts.push_back("%");
}
if (token[cursor] == '?') {
expression_parts.push_back(token.substr(last, cursor)); last = cursor + 1;
expression_parts.push_back("?");
trinaryLevel++;
state = 9;
}
} else if (state == 4) {
if (token[cursor] == '\'') {
if (token[cursor - 1] != '\\') {
std::string quote = token.substr(quoteStart + 1, cursor - quoteStart - 1);
expression_parts.push_back(quote);
//std::cout << "single quote: " << quote << std::endl;
state = 0;
}
}
} else if (state == 5) {
if (token[cursor] == '\'') {
if (token[cursor - 1] != '\\') {
std::string quote = token.substr(quoteStart + 1, cursor - quoteStart - 1);
expression_parts.push_back(quote);
//std::cout << "single quote: " << quote << std::endl;
state = 0;
}
}
} else if (state == 8) {
if (token[cursor] == '(') {
parenLevel++;
} else
if (token[cursor] == ')') {
parenLevel--;
if (!parenLevel) {
expression_parts.push_back(token.substr(last, cursor)); last = cursor;
expression_parts.push_back(")");
state = 0;
}
}
} else if (state == 9) {
if (token[cursor] == '?') {
trinaryLevel++;
} else
if (token[cursor] == ':') {
trinaryLevel--;
if (!trinaryLevel) {
expression_parts.push_back(token.substr(last, cursor)); last = cursor + 1;
expression_parts.push_back(":");
state = 0;
}
}
}
}
std::cout << "expression token[" << token << "]" << std::endl;
std::cout << "expression debug" << std::endl;
for(auto it : expression_parts) {
std::cout << "[" << it << "]" << std::endl;
}
std::cout << "expression end" << std::endl;
auto hasTripleEqual = token.find("===");
auto hasDoubleEqual = std::string::npos;
auto hasSingleEqual = std::string::npos;
if (hasTripleEqual == std::string::npos) {
hasDoubleEqual = token.find("==");
} else {
// process === expression
std::cout << "JSParser:::doAssignment - strict compare not implemented" << std::endl;
//std::cout << "token[" << token << "]" << std::endl;
}
if (hasDoubleEqual == std::string::npos) {
hasSingleEqual = token.find("=");
} else {
// process == expression
std::cout << "JSParser:::doAssignment - compare not implemented" << std::endl;
}
if (hasSingleEqual != std::string::npos) {
auto keyValue = split(token, '=');
if (keyValue.size() < 2) {
std::cout << "JSParser:::doAssignment - bad var parse " << keyValue[0] << std::endl;
return false;
}
// FIXME: dot notation in keys
auto key = trim(keyValue[0]);
// FIXME: is value a lambda
auto value = trim(keyValue[1]);
//std::cout << "[" << key << "=" << value << "]" << std::endl;
rootScope.variables[key] = value;
} else {
// var bob; just make sure the variable exists
rootScope.variables[token] = "";
}
return true;
}
// extract scopes & scope.variables
// build exeecution tree
std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
std::shared_ptr<JavaScript> script = std::make_shared<JavaScript>();
script->tokens = this->getTokens(source);
// we need to at least build the root scope
//std::cout << "\nstart script" << std::endl;
for(auto it : script->tokens) {
//std::cout << "parse token[" << it << "]" << std::endl;
if (it.substr(0, 2)=="if") {
auto ifStr = it.substr(2);
// find (
auto end = ifStr.find('(');
ifStr = ifStr.substr(0, end);
// find )
end = ifStr.find(')');
auto ifCondition = ifStr.substr(0, end);
//std::cout << "ifCondition[" << ifCondition << "]" << std::endl;
ifStr = ifStr.substr(0, end);
// do we have a block start?
// find { (block start)
end = ifStr.find('{');
// if not block start
// else
end = ifStr.find('{');
std::cout << "if not implemented" << std::endl;
} else if (it.substr(0, 3)=="var") {
auto listStr = it.substr(3);
// FIXME: , in quotes or {} (JSON) <= top priority for 4chan
std::vector<std::string> opens, closes;
opens.push_back("{");
opens.push_back("'");
opens.push_back("\"");
closes.push_back("}");
closes.push_back("'");
closes.push_back("\"");
auto varList = parseSepButNotBetween(listStr, ",", opens, closes);
//std::cout << "has " << varList.size() << " variables" << std::endl;
for(auto it2 : varList) {
/*
// FIXME: make sure = isn't in quotes or JSON?
// FIXME: double or triple equal differentiation
//std::cout << "looking at [" << it2 << "]" << std::endl;
auto hasTripleEqual = it2.find("===");
auto hasDoubleEqual = std::string::npos;
auto hasSingleEqual = std::string::npos;
if (hasTripleEqual == std::string::npos) {
hasDoubleEqual = it2.find("==");
} else {
// process expression
std::cout << "var strict compare not implemented" << std::endl;
}
if (hasDoubleEqual == std::string::npos) {
hasSingleEqual = it2.find("=");
} else {
// process expression
std::cout << "var compare not implemented" << std::endl;
}
if (hasSingleEqual != std::string::npos) {
auto keyValue = split(it2, '=');
if (keyValue.size() < 2) {
std::cout << "bad var parse " << keyValue[0] << std::endl;
continue;
}
// FIXME: dot notation in keys
auto key = trim(keyValue[0]);
auto value = trim(keyValue[1]);
//std::cout << "[" << key << "=" << value <<s"]" << std::endl;
script->rootScope.variables[key] = value;
} else {
// var bob; just make sure the variable exists
script->rootScope.variables[it2] = "";
}
*/
doAssignment(script->rootScope, it2);
}
} else if (it.substr(0, 9)=="function ") {
auto defStr = it.substr(9);
// find ( (name end, prototype start)
auto end = defStr.find('(');
auto funcName = defStr.substr(0, end);
defStr = defStr.substr(end + 1); // next char after (
// find ) (prototype end)
end = defStr.find(')');
auto prototype = defStr.substr(0, end);
defStr = defStr.substr(end + 1); // next char after )
// find { (func start)
end = defStr.find('{');
defStr = defStr.substr(end + 1, defStr.size() - 2); // from { to the end
auto funcTokens = this->getTokens(defStr);
//std::cout << "function [" << funcName << "] prototype [" << prototype << "] has [" << funcTokens.size() << "] tokens" << std::endl;
// __netrunner_function_definition is 31 chars
script->rootScope.variables[funcName] = "__netrunner_function_definition = { prototype: \"" + prototype + "\", code: \"" + defStr + "\" }";
js_function newFunc;
newFunc.tokens = funcTokens;
newFunc.parent = &script->rootScope; // is this going to a problem?
} else if (it.substr(0, 6)=="return") {
// js expression here
// probably don't need to do anything here atm
std::cout << "return not implemented" << std::endl;
} else if (it.find("=") != std::string::npos) {
// has = so it's an expression
//std::cout << "assignment[" << it << "]" << std::endl;
//std::cout << "assignment not implemented" << std::endl;
doAssignment(script->rootScope, it);
} else if (it.find("(") != std::string::npos && it.find(")") != std::string::npos) {
// has () so it's a function call
//std::cout << "funcCall[" << it << "]" << std::endl;
// we need to start passed any && or ||
// need to parse any expression before the function call...
if (it.find("&&") == std::string::npos && it.find("||") == std::string::npos) {
// figure out function name
auto parenStart = it.find("(");
auto funcName = it.substr(0, parenStart);
//std::cout << "I think the function name is [" << funcName << "]" << std::endl;
if (script->rootScope.variables.find(funcName) == script->rootScope.variables.end()) {
std::cout << "Function [" << funcName << "] d.n.e" << std::endl;
} else {
// make sure function is parsed
// and step through tokens
auto arguments = it.substr(parenStart + 1, it.find(")") - parenStart - 1);
std::cout << "functionCall[" << funcName << "](" << arguments << ") not implemented" << std::endl;
//std::cout << "parameters[" << arguments << "]" << std::endl;
}
} else {
std::cout << "expression before functionCall not implemented" << std::endl;
}
//std::cout << "functionCall not implemented" << std::endl;
} else {
std::cout << "unknown_type[" << it << "]" << std::endl;
}
}
//std::cout << "end script" << "\n" << std::endl;
return script;
}
std::shared_ptr<JavaScript> JSParser::append(std::shared_ptr<JavaScript> &destination, const std::shared_ptr<JavaScript> &source) const {
for(auto it : source->tokens) {
destination->tokens.push_back(it);
}
// merge scopes (instead of reparsing)
for(auto it : source->rootScope.variables) {
destination->rootScope.variables[it.first] = it.second;
}
return destination;
}