From d9a9b9477304468b3767ee99c6105bfee83e8f8b Mon Sep 17 00:00:00 2001 From: Odilitime Date: Tue, 29 Aug 2017 17:04:43 -0700 Subject: [PATCH] notes and debug --- src/html/HTMLParser.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/html/HTMLParser.cpp b/src/html/HTMLParser.cpp index 2d1b7cd..d461039 100644 --- a/src/html/HTMLParser.cpp +++ b/src/html/HTMLParser.cpp @@ -98,11 +98,13 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { else if (html[cursor] == '<') { // HTML comments if (html[cursor + 1] == '!' && html[cursor + 2] == '-' && html[cursor + 3] == '-' ) { + //std::cout << "HTMLParser::Parse - starting HTML comment at " << cursor << std::endl; state = 4; } // close tag else if (html[cursor + 1] == '/') { // start closing tag + //std::cout << "HTMLParser::Parse - starting closing tag at " << html.substr(cursor, 7) << std::endl; if (currentNode && currentNode->parent) { // we should snap to the level we started at (as we maybe a couple levels deep // but it's the matching part of this tag @@ -111,7 +113,6 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { std::cout << "HTMLParser::Parse - currentNode/parent is null - close tag" << std::endl; } state = 1; // ignore closing tags - //starts.push_back(cursor); } // these have never have a closing tag else if ( @@ -123,6 +124,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { (html[cursor + 1] == 'm' && html[cursor + 2] == 'e' && html[cursor + 3] == 't' && html[cursor + 4] == 'a') || (html[cursor + 1] == 'i' && html[cursor + 2] == 'n' && html[cursor + 3] == 'p' && html[cursor + 4] == 'u' && html[cursor + 5] == 't') ) { + //std::cout << "HTMLParser::Parse - Starting single tag " << html.substr(cursor, 6) << std::endl; std::shared_ptr tagNode = std::make_shared(); if (currentNode) { currentNode->children.push_back(tagNode); @@ -138,7 +140,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { cursor ++; } else { std::string element = html.substr(cursor, closeTagPos + 2); - //std::cout << "creating element, tag: " << element << std::endl; + //std::cout << "HTMLParser::Parse - creating element, tag: " << element << std::endl; parseTag(element, *dynamic_cast(currentNode.get())); cursor += 2 + closeTagPos; } @@ -155,6 +157,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { } // start tag ( tagNode = std::make_shared(); if (currentNode) { currentNode->children.push_back(tagNode); @@ -168,6 +171,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { } } else { // start text node + //std::cout << "HTMLParser::Parse - start text node " << html.substr(cursor, 6) << std::endl; std::shared_ptr textNode = std::make_shared(); // not sure why currentNode is null but it is if (currentNode) { @@ -182,17 +186,16 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { } cursor--; } - else if (state == 1) { // Skip Over Element + else if (state == 1) { // Skip Over Element (used by closing tag) if (html[cursor] == '>') { - //std::string element = html.substr(starts.back(), cursor - starts.back() + 1); - //starts.pop_back(); - //std::cout << "HTMLParser::parse - close tag: " << element << std::endl; + //std::cout << "HTMLParser::parse - close tag: " << html.substr(starts.back(), cursor - starts.back() + 1) << std::endl; state = 0; prependWhiteSpace = false; } } else if (state == 4) { // HTML Comment if (html[cursor] == '-' && html[cursor + 1] == '-' && html[cursor + 2] == '>') { + //std::cout << "HTMLParser::Parse - Found end HTML comment at " << html.substr(cursor, 6) << std::endl; state = 0; cursor += 2; // advance cursor to end of comment prependWhiteSpace = false; @@ -201,7 +204,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { else if (state == 2) { // Search for end tag node if (html[cursor] == '>') { // end tag node std::string element = html.substr(starts.back(), cursor - starts.back() + 1); - //std::cout << "HTMLParser::parse - close tag: " << element << std::endl; + //std::cout << "HTMLParser::parse - end open tag: " << element << std::endl; if (element == "
  • ") { // this will close previous li before starting a new one autoCloseTag(currentNode, rootNode, "ul", "li"); @@ -221,6 +224,7 @@ std::shared_ptr HTMLParser::parse(const std::string &html) const { else if (state == 3) { // End text node if (html[cursor + 1] == '<') { dynamic_cast(currentNode.get())->text = (prependWhiteSpace?" ":"") + html.substr(starts.back(), cursor - starts.back() + 1); + //std::cout << "HTMLParser::parse - end text node: " << html.substr(starts.back(), cursor - starts.back() + 1) << std::endl; starts.pop_back(); if (currentNode && currentNode->parent) { currentNode = currentNode->parent; @@ -273,6 +277,8 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const { } else if (state == 3) { if (element[cursor] == '"') { + // was suggested to use tagNode.properties[propertyKey] = element.substr(start, cursor - start); + // for better readabiilty tagNode.properties.insert(std::pair(propertyKey, element.substr(start, cursor - start))); start = cursor + 1; state = 1;