Browse Source

more data strucuture (move scope to .h), getToken() refactor, doAssignment()

master
Odilitime 2 years ago
parent
commit
7c72f69025

+ 246
- 42
src/parsers/scripting/javascript/JSParser.cpp View File

@@ -1,32 +1,21 @@
#include "JSParser.h"
#include <vector>
#include <map>
#include <iostream>
#include "../../../StringUtils.h"

class js_scope {
public:
js_scope *parent;
js_scope() {
parent = nullptr;
}
std::map<std::string, std::string> variables;
};

std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
std::shared_ptr<JavaScript> ret = std::make_shared<JavaScript>();
std::shared_ptr<js_scope> global = std::make_shared<js_scope>();
std::cout << "source: " << source << "\n" << std::endl;
// tokenize it
unsigned int cursor;
int state = 0;
int last = 0;
int quoteStart = 0;
int scopeLevel = 0;
int jsonStart = 0;
int jsonLevel = 0;
int functionStart = 0;
// each token is one statement
std::vector<std::string> JSParser::getTokens(const std::string &source) const {
std::vector<std::string> tokens;
//std::cout << "source: " << source << "\n" << std::endl;
// tokenize it
size_t cursor;
unsigned char state = 0;
size_t last = 0;
size_t quoteStart = 0;
size_t scopeLevel = 0;
size_t jsonStart = 0;
size_t jsonLevel = 0;
size_t parenLevel = 0;
size_t parenStart = 0;
size_t functionStart = 0;
for (cursor = 0; cursor < source.length(); cursor++) {
if (state == 0) {
if (source[cursor] == '{') {
@@ -34,6 +23,10 @@ std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
jsonStart = cursor;
jsonLevel++;
//std::cout << "Entering JSON: " << cursor << std::endl;
} else if (source[cursor] == '(') {
state = 8; // in a function call or prototype
parenStart = cursor;
parenLevel++;
} else if (source[cursor] == '\'') { // quotes just for allowing [;{}\n] in quotes
quoteStart = cursor;
state = 4;
@@ -47,12 +40,12 @@ std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
// Multiline comment
state = 3;
} else if (source[cursor] == 'v' && source.length() > cursor + 3 && source[cursor + 1] == 'a'
&& source[cursor + 2] == 'r' && source[cursor + 3] == ' ') {
&& source[cursor + 2] == 'r' && source[cursor + 3] == ' ') {
// var
state = 7;
} else if (source[cursor] == 'f' && source.length() > cursor + 8 && source[cursor + 1] == 'u'
&& source[cursor + 2] == 'n' && source[cursor + 3] == 'c' && source[cursor + 4] == 't'
&& source[cursor + 5] == 'i' && source[cursor + 6] == 'o' && source[cursor + 7] == 'n') {
&& source[cursor + 2] == 'n' && source[cursor + 3] == 'c' && source[cursor + 4] == 't'
&& source[cursor + 5] == 'i' && source[cursor + 6] == 'o' && source[cursor + 7] == 'n') {
//std::cout << "Entering function: " << cursor << std::endl;
state = 6;
functionStart = cursor;
@@ -61,23 +54,36 @@ std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
// inside a scope (JSON)
if (source[cursor] == '{') {
jsonLevel++;
} else
if (source[cursor] == '}') {
} else if (source[cursor] == '}') {
jsonLevel--;
if (!jsonLevel) {
std::cout << "Exiting JSON: " << source.substr(jsonStart, cursor - jsonStart) << std::endl;
//std::cout << "Exiting JSON: " << source.substr(jsonStart, cursor - jsonStart) << "\n" << std::endl;
state = 0; // exit JSON
}
}
} else if (state == 2) {
} else if (state == 8) {
// inside a paren (function)
//std::cout << "looking at [" << source[cursor] << "]@" << cursor << std::endl;
if (source[cursor] == '(') {
parenLevel++;
} else if (source[cursor] == ')') {
parenLevel--;
if (!parenLevel) {
//std::cout << "Exiting Paren: " << source.substr(parenStart, cursor - parenStart) << "\n" << std::endl;
state = 0; // exit JSON
}
}
} else if (state == 2) {
// inside a single line comment
if (source[cursor] == '\n') {
last = cursor;
state = 0;
}
} else if (state == 3) {
// inside a multiline comment
if (source[cursor] == '*' && source.length() > cursor + 1 && source[cursor + 1] == '/') {
// end multiline comment
last = cursor;
state = 0;
}
} else if (state == 4) {
@@ -109,34 +115,232 @@ std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
if (source[cursor] == '}') {
scopeLevel--;
if (state == 6 && !scopeLevel) {
std::cout << "Exiting function: " << source.substr(functionStart, cursor - functionStart) << std::endl;
//std::cout << "Exiting function: " << source.substr(functionStart, cursor - functionStart) << "\n" << std::endl;
state = 0;
endIt = true;
}
}
// state 0 or 7, ignore states 1-6
if ((state == 0 || state == 7) && !scopeLevel) {
if (source[cursor] == '\n' || source[cursor] == ';' || endIt) {
if (source[cursor] == '\n' || source[cursor] == ';' || endIt || (source[cursor] == ',' && state != 7)) {
// FIXME: ; in for loops
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor );
if (source[cursor] == '}') {
token += '}';
}
// scopeLevel[" << scopeLevel << "]"
std::cout << "got token [" << token << "] ending[" << source[cursor] << "] endIt[" << endIt << "]" << std::endl;
tokens.push_back(token);
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "] endIt[" << endIt << "]" << std::endl;
if (token.length()<3) {
//std::cout << "token too short [" << token << "]" << std::endl;
} else {
tokens.push_back(token);
}
last = cursor;
if (state == 7) { // allow var constructs to end normally and take us out of var construct
state = 0; // reset state
state = 0; // reset state
}
}
}
}
std::cout << "out of characters in state " << state << std::endl;
std::string token = source.substr(last ? last + 1 : last, last ? (cursor - last - 1) : cursor );
tokens.push_back(token);
std::cout << "got token [" << token << "] ending[" << source[cursor] << "]" << std::endl;
return ret;
//&& !token.length() // all look like complete valid tokens
if (!state ) {
return tokens;
}
//std::cout << "out of characters in state " << std::to_string(state) << " token[" << token << "]" << std::endl;
//std::cout << "got token [" << token << "] ending[" << source[cursor] << "]" << std::endl;
if (token.length()<3) {
//std::cout << "token too short [" << token << "]" << std::endl;
} else {
tokens.push_back(token);
}
return tokens;
}

bool doAssignment(js_scope &rootScope, std::string token) {
// FIXME: make sure = isn't in quotes or JSON?
// FIXME: double or triple equal differentiation
//std::cout << "looking at [" << it2 << "]" << std::endl;
auto hasTripleEqual = token.find("===");
auto hasDoubleEqual = std::string::npos;
auto hasSingleEqual = std::string::npos;
if (hasTripleEqual == std::string::npos) {
hasDoubleEqual = token.find("==");
} else {
// process === expression
std::cout << "JSParser:::doAssignment - strict compare not implemented" << std::endl;
}
if (hasDoubleEqual == std::string::npos) {
hasSingleEqual = token.find("=");
} else {
// process == expression
std::cout << "JSParser:::doAssignment - compare not implemented" << std::endl;
}
if (hasSingleEqual != std::string::npos) {
auto keyValue = split(token, '=');
if (keyValue.size() < 2) {
std::cout << "JSParser:::doAssignment - bad var parse " << keyValue[0] << std::endl;
return false;
}
// FIXME: dot notation in keys
auto key = trim(keyValue[0]);
// FIXME: is value a lambda
auto value = trim(keyValue[1]);
//std::cout << "[" << key << "=" << value << "]" << std::endl;
rootScope.variables[key] = value;
} else {
// var bob; just make sure the variable exists
rootScope.variables[token] = "";
}
return true;
}

// extract scopes & scope.variables
// build exeecution tree
std::shared_ptr<JavaScript> JSParser::parse(const std::string &source) const {
std::shared_ptr<JavaScript> script = std::make_shared<JavaScript>();
script->tokens = this->getTokens(source);
// we need to at least build the root scope
//std::cout << "\nstart script" << std::endl;
for(auto it : script->tokens) {
//std::cout << "parse token[" << it << "]" << std::endl;
if (it.substr(0, 2)=="if") {
auto ifStr = it.substr(2);
// find (
auto end = ifStr.find('(');
ifStr = ifStr.substr(0, end);
// find )
end = ifStr.find(')');
auto ifCondition = ifStr.substr(0, end);
//std::cout << "ifCondition[" << ifCondition << "]" << std::endl;
ifStr = ifStr.substr(0, end);
// do we have a block start?
// find { (block start)
end = ifStr.find('{');
// if not block start
// else
end = ifStr.find('{');
std::cout << "if not implemented" << std::endl;
} else if (it.substr(0, 3)=="var") {
auto listStr = it.substr(3);
// FIXME: , in quotes or {} (JSON) <= top priority for 4chan
std::vector<std::string> opens, closes;
opens.push_back("{");
opens.push_back("'");
opens.push_back("\"");
closes.push_back("}");
closes.push_back("'");
closes.push_back("\"");
auto varList = parseSepButNotBetween(listStr, ",", opens, closes);
//std::cout << "has " << varList.size() << " variables" << std::endl;
for(auto it2 : varList) {
/*
// FIXME: make sure = isn't in quotes or JSON?
// FIXME: double or triple equal differentiation
//std::cout << "looking at [" << it2 << "]" << std::endl;
auto hasTripleEqual = it2.find("===");
auto hasDoubleEqual = std::string::npos;
auto hasSingleEqual = std::string::npos;
if (hasTripleEqual == std::string::npos) {
hasDoubleEqual = it2.find("==");
} else {
// process expression
std::cout << "var strict compare not implemented" << std::endl;
}
if (hasDoubleEqual == std::string::npos) {
hasSingleEqual = it2.find("=");
} else {
// process expression
std::cout << "var compare not implemented" << std::endl;
}
if (hasSingleEqual != std::string::npos) {
auto keyValue = split(it2, '=');
if (keyValue.size() < 2) {
std::cout << "bad var parse " << keyValue[0] << std::endl;
continue;
}
// FIXME: dot notation in keys
auto key = trim(keyValue[0]);
auto value = trim(keyValue[1]);
//std::cout << "[" << key << "=" << value <<s"]" << std::endl;
script->rootScope.variables[key] = value;
} else {
// var bob; just make sure the variable exists
script->rootScope.variables[it2] = "";
}
*/
doAssignment(script->rootScope, it2);
}
} else if (it.substr(0, 9)=="function ") {
auto defStr = it.substr(9);
// find ( (name end, prototype start)
auto end = defStr.find('(');
auto funcName = defStr.substr(0, end);
defStr = defStr.substr(end + 1); // next char after (
// find ) (prototype end)
end = defStr.find(')');
auto prototype = defStr.substr(0, end);
defStr = defStr.substr(end + 1); // next char after )
// find { (func start)
end = defStr.find('{');
defStr = defStr.substr(end + 1, defStr.size() - 2); // from { to the end
auto funcTokens = this->getTokens(defStr);
//std::cout << "function [" << funcName << "] prototype [" << prototype << "] has [" << funcTokens.size() << "] tokens" << std::endl;
// __netrunner_function_definition is 31 chars
script->rootScope.variables[funcName] = "__netrunner_function_definition = { prototype: \"" + prototype + "\", code: \"" + defStr + "\" }";
js_function newFunc;
newFunc.tokens = funcTokens;
newFunc.parent = &script->rootScope; // is this going to a problem?
} else if (it.substr(0, 6)=="return") {
// js expression here
// probably don't need to do anything here atm
std::cout << "return not implemented" << std::endl;
} else if (it.find("=") != std::string::npos) {
// has = so it's an expression
//std::cout << "assignment[" << it << "]" << std::endl;
//std::cout << "assignment not implemented" << std::endl;
doAssignment(script->rootScope, it);
} else if (it.find("(") != std::string::npos && it.find(")") != std::string::npos) {
// has () so it's a function call
//std::cout << "funcCall[" << it << "]" << std::endl;
// we need to start passed any && or ||
// need to parse any expression before the function call...
if (it.find("&&") == std::string::npos && it.find("||") == std::string::npos) {
// figure out function name
auto parenStart = it.find("(");
auto funcName = it.substr(0, parenStart);
//std::cout << "I think the function name is [" << funcName << "]" << std::endl;
if (script->rootScope.variables.find(funcName) == script->rootScope.variables.end()) {
std::cout << "Function [" << funcName << "] d.n.e" << std::endl;
} else {
// make sure function is parsed
// and step through tokens
std::cout << "functionCall[" << funcName << "] not implemented" << std::endl;
auto arguments = it.substr(parenStart, it.find(")"));
std::cout << "parameters[" << arguments << "]" << std::endl;
}
} else {
std::cout << "expression before functionCall not implemented" << std::endl;
}
//std::cout << "functionCall not implemented" << std::endl;
} else {
std::cout << "unknown_type[" << it << "]" << std::endl;
}
}
//std::cout << "end script" << "\n" << std::endl;
return script;
}

std::shared_ptr<JavaScript> JSParser::append(std::shared_ptr<JavaScript> &destination, const std::shared_ptr<JavaScript> &source) const {
for(auto it : source->tokens) {
destination->tokens.push_back(it);
}
// merge scopes (instead of reparsing)
for(auto it : source->rootScope.variables) {
destination->rootScope.variables[it.first] = it.second;
}
return destination;
}

+ 72
- 1
src/parsers/scripting/javascript/JSParser.h View File

@@ -2,14 +2,85 @@
#define JSPARSER_H

#include <string>
#include <vector>
#include <map>

class JavaScript {
// Internal JS types: String, Number, Function, Array, Object
// blocks/scopes?
class js_internal_storage {
public:
// toString
// toNumber
// toFunction
// toArray
// toObject
};

class js_scope {
public:
js_scope *parent;
// what do we need children for?
// a callstack only includes it's parents (in JS?)
std::vector<js_scope> children;
js_scope() {
parent = nullptr;
}
std::map<std::string, std::string> variables;
std::map<std::string, js_internal_storage> data;
// feel like we need an instruction pointer...
// esp. for loops
// but how we address tokens, by index?
};

class js_string : public js_internal_storage {
public:
std::string value;
};

class js_number : public js_internal_storage {
public:
signed long value;
};

class js_function : public js_internal_storage {
public:
std::vector<std::string> tokens;
js_scope *parent; // usually global
js_scope local;
};

class js_array : public js_internal_storage {
public:
std::vector<js_internal_storage> value;
};

class js_object : public js_internal_storage {
public:
std::map<std::string, js_internal_storage> value;
};


class JavaScript {
public:
void clear() {
tokens.clear();
rootScope.parent = nullptr;
rootScope.children.clear();
rootScope.variables.clear();
}
// each token is one statement
std::vector<std::string> tokens;
// we're just settings the rootScope.variables
std::vector<std::string> definitions; // all var declarations and their expressions
std::vector<std::string> instructions; // then a list of all remaining expressions and function calls
js_scope rootScope;
};

class JSParser {
public:
std::shared_ptr<JavaScript> parse(const std::string &javascript) const;
std::vector<std::string> getTokens(const std::string &source) const;
std::shared_ptr<JavaScript> append(std::shared_ptr<JavaScript> &destination, const std::shared_ptr<JavaScript> &source) const;
};

#endif

Loading…
Cancel
Save