Browse Source

NTRML parser

Odilitime 5 years ago
parent
commit
f047ef6b56
  1. 169
      src/parsers/ntrml/NTRMLParser.cpp
  2. 12
      src/parsers/ntrml/NTRMLParser.h

169
src/parsers/ntrml/NTRMLParser.cpp

@ -0,0 +1,169 @@ @@ -0,0 +1,169 @@
#include "NTRMLParser.h"
#include "TextNode.h"
#include <algorithm>
#include <iostream>
#include <memory>
std::shared_ptr<Node> NTRMLParser::parse(const std::string &ntrml) const {
std::shared_ptr<Node> rootNode = std::make_shared<Node>(NodeType::ROOT);
std::shared_ptr<Node> currentNode = rootNode;
std::shared_ptr<Node> startTagLevel = rootNode;
std::vector<unsigned int> starts;
unsigned int cursor;
int state = 0;
int prependWhiteSpace = false;
for (cursor = 0; cursor < ntrml.length(); cursor++) { // TODO handle trying to look ahead past string
if (state == 0) { // Outside tag
if (ntrml[cursor] == ' ' || ntrml[cursor] == '\t' || ntrml[cursor] == '\r' || ntrml[cursor] == '\n') {
prependWhiteSpace = true;
continue;
}
else if (ntrml[cursor] == '<') {
// HTML style comments
if (ntrml[cursor + 1] == '!' && ntrml[cursor + 2] == '-' && ntrml[cursor + 3] == '-' ) {
//std::cout << "NTRMLParser::Parse - starting HTML comment at " << cursor << std::endl;
cursor += 3; // advance cursor to end of comment start
state = 4;
}
// close tag
else if (ntrml[cursor + 1] == '/') {
// start closing tag
//std::cout << "NTRMLParser::Parse - starting closing tag at " << ntrml.substr(cursor, 7) << std::endl;
if (currentNode && currentNode->parent) {
// we should snap to the level we started at (as we maybe a couple levels deep <ul><li></ul>
// but it's the matching part of this tag
currentNode = currentNode->parent;
} else {
std::cout << "NTRMLParser::Parse - currentNode/parent is null - close tag" << std::endl;
}
state = 1; // ignore closing tags
}
// these have never have a closing tag
else if (
/*(ntrml[cursor + 1] == 'h' && ntrml[cursor + 2] == 'r') ||
(ntrml[cursor + 1] == 'b' && ntrml[cursor + 2] == 'r') ||
(ntrml[cu rsor + 1] == 'w' && ntrml[cursor + 2] == 'b' && ntrml[cursor + 3] == 'r') || */
(ntrml[cursor + 1] == 'i' && ntrml[cursor + 2] == 'm' && ntrml[cursor + 3] == 'g') ||
/* (ntrml[cursor + 1] == 'l' && ntrml[cursor + 2] == 'i' && ntrml[cursor + 3] == 'n' && ntrml[cursor + 4] == 'k') ||
(ntrml[cursor + 1] == 'm' && ntrml[cursor + 2] == 'e' && ntrml[cursor + 3] == 't' && ntrml[cursor + 4] == 'a') || */
(ntrml[cursor + 1] == 'i' && ntrml[cursor + 2] == 'n' && ntrml[cursor + 3] == 'p' && ntrml[cursor + 4] == 'u' && ntrml[cursor + 5] == 't')
) {
//std::cout << "NTRMLParser::Parse - Starting single tag " << ntrml.substr(cursor, 6) << std::endl;
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
currentNode->children.push_back(tagNode);
tagNode->parent = currentNode;
} else {
std::cout << "NTRMLParser::Parse - currentNode is null - tagNode" << std::endl;
}
currentNode = tagNode;
size_t closeTagPos = ntrml.substr(cursor + 1).find(">");
//std::cout << "found closeTagPos at " << closeTagPos << std::endl;
if (closeTagPos == std::string::npos) {
std::cout << "NTRMLParser::Parse - can't find closing tag for single tag" << std::endl;
cursor ++;
} else {
std::string element = ntrml.substr(cursor, closeTagPos + 2);
//std::cout << "NTRMLParser::Parse - creating element, tag: " << element << std::endl;
parseTag(element, *dynamic_cast<TagNode*>(currentNode.get()));
cursor += 2 + closeTagPos;
}
// drop back
if (currentNode && currentNode->parent) {
currentNode = currentNode->parent;
} else {
std::cout << "NTRMLParser::Parse - currentNode/parent is null - textNode state3" << std::endl;
}
prependWhiteSpace = false;
state = 0;
}
// start tag (<bob> <bob part)
else {
//std::cout << "NTRMLParser::Parse - start oc tag " << ntrml.substr(cursor, 6) << std::endl;
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
currentNode->children.push_back(tagNode);
tagNode->parent = currentNode;
} else {
std::cout << "NTRMLParser::Parse - currentNode is null - tagNode" << std::endl;
}
currentNode = tagNode;
starts.push_back(cursor);
state = 2;
}
}
else { // start text node
//std::cout << "NTRMLParser::Parse - start text node " << ntrml.substr(cursor, 6) << std::endl;
std::shared_ptr<TextNode> textNode = std::make_shared<TextNode>();
// not sure why currentNode is null but it is
if (currentNode) {
currentNode->children.push_back(textNode);
textNode->parent = currentNode;
} else {
std::cout << "NTRMLParser::Parse - currentNode is null - textNode" << std::endl;
}
currentNode = textNode;
starts.push_back(cursor);
state = 3;
}
cursor--;
}
else if (state == 1) { // Skip Over Element (used by closing tag)
if (ntrml[cursor] == '>') {
//std::cout << "NTRMLParser::parse - close tag: " << ntrml.substr(starts.back(), cursor - starts.back() + 1) << std::endl;
state = 0;
prependWhiteSpace = false;
}
}
else if (state == 4) { // HTML style Comment
if (ntrml[cursor] == '-' && ntrml[cursor + 1] == '-' && ntrml[cursor + 2] == '>') {
//std::cout << "NTRMLParser::Parse - Found end NTRML comment at " << ntrml.substr(cursor, 6) << std::endl;
state = 0;
cursor += 2; // advance cursor to end of comment
prependWhiteSpace = false;
}
}
else if (state == 2) { // Search for end tag node
if (ntrml[cursor] == '>') { // end tag node
std::string element = ntrml.substr(starts.back(), cursor - starts.back() + 1);
//std::cout << "NTRMLParser::parse - end open tag: " << element << std::endl;
/*
if (element == "<li>") {
// this will close previous li before starting a new one
autoCloseTag(currentNode, rootNode, "ul", "li");
//std::cout << "scanned parents for unclosed lis" << std::endl;
} else
if (element == "<option>") { // FIXME: options have attributes
// this will close previous option before starting a new one
autoCloseTag(currentNode, rootNode, "select", "option");
//std::cout << "scanned parents for unclosed options" << std::endl;
}
*/
starts.pop_back();
parseTag(element, *dynamic_cast<TagNode*>(currentNode.get()));
state = 0;
prependWhiteSpace = false;
}
}
else if (state == 3) { // End text node
if (ntrml[cursor + 1] == '<') {
dynamic_cast<TextNode*>(currentNode.get())->text = (prependWhiteSpace?" ":"") + ntrml.substr(starts.back(), cursor - starts.back() + 1);
//std::cout << "NTRMLParser::parse - end text node: " << ntrml.substr(starts.back(), cursor - starts.back() + 1) << std::endl;
starts.pop_back();
if (currentNode && currentNode->parent) {
currentNode = currentNode->parent;
} else {
std::cout << "NTRMLParser::Parse - currentNode/parent is null - textNode state3" << std::endl;
}
state = 0;
prependWhiteSpace = false;
}
}
}
//printNode(rootNode, 0);
return rootNode;
}

12
src/parsers/ntrml/NTRMLParser.h

@ -0,0 +1,12 @@ @@ -0,0 +1,12 @@
#ifndef NTRMLPARSER_H
#define NTRMLPARSER_H
#include "HTMLParser.h"
// our parseTag function is similar enough
class NTRMLParser : public HTMLParser {
public:
std::shared_ptr<Node> parse(const std::string &ntrml) const;
};
#endif
Loading…
Cancel
Save