Browse Source

notes and debug

master
Odilitime 4 years ago
parent
commit
d9a9b94773
1 changed files with 13 additions and 7 deletions
  1. 13
    7
      src/html/HTMLParser.cpp

+ 13
- 7
src/html/HTMLParser.cpp View File

@@ -98,11 +98,13 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
else if (html[cursor] == '<') {
// HTML comments
if (html[cursor + 1] == '!' && html[cursor + 2] == '-' && html[cursor + 3] == '-' ) {
//std::cout << "HTMLParser::Parse - starting HTML comment at " << cursor << std::endl;
state = 4;
}
// close tag
else if (html[cursor + 1] == '/') {
// start closing tag
//std::cout << "HTMLParser::Parse - starting closing tag at " << html.substr(cursor, 7) << std::endl;
if (currentNode && currentNode->parent) {
// we should snap to the level we started at (as we maybe a couple levels deep <ul><li></ul>
// but it's the matching part of this tag
@@ -111,7 +113,6 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
std::cout << "HTMLParser::Parse - currentNode/parent is null - close tag" << std::endl;
}
state = 1; // ignore closing tags
//starts.push_back(cursor);
}
// these have never have a closing tag
else if (
@@ -123,6 +124,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
(html[cursor + 1] == 'm' && html[cursor + 2] == 'e' && html[cursor + 3] == 't' && html[cursor + 4] == 'a') ||
(html[cursor + 1] == 'i' && html[cursor + 2] == 'n' && html[cursor + 3] == 'p' && html[cursor + 4] == 'u' && html[cursor + 5] == 't')
) {
//std::cout << "HTMLParser::Parse - Starting single tag " << html.substr(cursor, 6) << std::endl;
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
currentNode->children.push_back(tagNode);
@@ -138,7 +140,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
cursor ++;
} else {
std::string element = html.substr(cursor, closeTagPos + 2);
//std::cout << "creating element, tag: " << element << std::endl;
//std::cout << "HTMLParser::Parse - creating element, tag: " << element << std::endl;
parseTag(element, *dynamic_cast<TagNode*>(currentNode.get()));
cursor += 2 + closeTagPos;
}
@@ -155,6 +157,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
}
// start tag (<bob> <bob part)
else {
//std::cout << "HTMLParser::Parse - start oc tag " << html.substr(cursor, 6) << std::endl;
std::shared_ptr<TagNode> tagNode = std::make_shared<TagNode>();
if (currentNode) {
currentNode->children.push_back(tagNode);
@@ -168,6 +171,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
}
}
else { // start text node
//std::cout << "HTMLParser::Parse - start text node " << html.substr(cursor, 6) << std::endl;
std::shared_ptr<TextNode> textNode = std::make_shared<TextNode>();
// not sure why currentNode is null but it is
if (currentNode) {
@@ -182,17 +186,16 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
}
cursor--;
}
else if (state == 1) { // Skip Over Element
else if (state == 1) { // Skip Over Element (used by closing tag)
if (html[cursor] == '>') {
//std::string element = html.substr(starts.back(), cursor - starts.back() + 1);
//starts.pop_back();
//std::cout << "HTMLParser::parse - close tag: " << element << std::endl;
//std::cout << "HTMLParser::parse - close tag: " << html.substr(starts.back(), cursor - starts.back() + 1) << std::endl;
state = 0;
prependWhiteSpace = false;
}
}
else if (state == 4) { // HTML Comment
if (html[cursor] == '-' && html[cursor + 1] == '-' && html[cursor + 2] == '>') {
//std::cout << "HTMLParser::Parse - Found end HTML comment at " << html.substr(cursor, 6) << std::endl;
state = 0;
cursor += 2; // advance cursor to end of comment
prependWhiteSpace = false;
@@ -201,7 +204,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
else if (state == 2) { // Search for end tag node
if (html[cursor] == '>') { // end tag node
std::string element = html.substr(starts.back(), cursor - starts.back() + 1);
//std::cout << "HTMLParser::parse - close tag: " << element << std::endl;
//std::cout << "HTMLParser::parse - end open tag: " << element << std::endl;
if (element == "<li>") {
// this will close previous li before starting a new one
autoCloseTag(currentNode, rootNode, "ul", "li");
@@ -221,6 +224,7 @@ std::shared_ptr<Node> HTMLParser::parse(const std::string &html) const {
else if (state == 3) { // End text node
if (html[cursor + 1] == '<') {
dynamic_cast<TextNode*>(currentNode.get())->text = (prependWhiteSpace?" ":"") + html.substr(starts.back(), cursor - starts.back() + 1);
//std::cout << "HTMLParser::parse - end text node: " << html.substr(starts.back(), cursor - starts.back() + 1) << std::endl;
starts.pop_back();
if (currentNode && currentNode->parent) {
currentNode = currentNode->parent;
@@ -273,6 +277,8 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
}
else if (state == 3) {
if (element[cursor] == '"') {
// was suggested to use tagNode.properties[propertyKey] = element.substr(start, cursor - start);
// for better readabiilty
tagNode.properties.insert(std::pair<std::string, std::string>(propertyKey, element.substr(start, cursor - start)));
start = cursor + 1;
state = 1;

Loading…
Cancel
Save