|
|
|
@ -249,23 +249,27 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
@@ -249,23 +249,27 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
|
|
|
|
|
std::string propertyKey; |
|
|
|
|
for (cursor = 0; cursor < element.length(); cursor++) { |
|
|
|
|
if (state == 0) { |
|
|
|
|
// space or end
|
|
|
|
|
if (element[cursor] == ' ' || element[cursor] == '>') { |
|
|
|
|
// set our tag (type / name, i.e. h1)
|
|
|
|
|
tagNode.tag = element.substr(start, cursor - start); |
|
|
|
|
// make sure our tag is lowercase
|
|
|
|
|
std::transform(tagNode.tag.begin(), tagNode.tag.end(), tagNode.tag.begin(), tolower); |
|
|
|
|
start = cursor + 1; |
|
|
|
|
state = 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (state == 1) { |
|
|
|
|
else if (state == 1) { // attribute search
|
|
|
|
|
if (element[cursor] == ' ') { |
|
|
|
|
start = cursor + 1; |
|
|
|
|
} |
|
|
|
|
else if (element[cursor] == '=') { |
|
|
|
|
propertyKey = element.substr(start, cursor - start); |
|
|
|
|
start = cursor + 1; // start for non quotes
|
|
|
|
|
state = 2; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (state == 2) { |
|
|
|
|
else if (state == 2) { // after = of attribute
|
|
|
|
|
if (element[cursor] == '"') { |
|
|
|
|
start = cursor + 1; |
|
|
|
|
state = 3; |
|
|
|
@ -273,6 +277,11 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
@@ -273,6 +277,11 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
|
|
|
|
|
else if (element[cursor] == '\'') { |
|
|
|
|
start = cursor + 1; |
|
|
|
|
state = 4; |
|
|
|
|
} else if (element[cursor] == ' ') { |
|
|
|
|
// we just probably found an end of attribute without quotes
|
|
|
|
|
tagNode.properties.insert(std::pair<std::string, std::string>(propertyKey, element.substr(start, cursor - start))); |
|
|
|
|
start = cursor + 1; |
|
|
|
|
state = 1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (state == 3) { |
|
|
|
@ -292,5 +301,15 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
@@ -292,5 +301,15 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// 2 is attribute without quotes, 3/4 is unclosed quote
|
|
|
|
|
if (state == 2 || state == 3 || state ==4) { |
|
|
|
|
// we were in an atrr=
|
|
|
|
|
tagNode.properties.insert(std::pair<std::string, std::string>(propertyKey, element.substr(start, cursor - start - 1))); |
|
|
|
|
} else { |
|
|
|
|
if (state != 1) { |
|
|
|
|
// so what's ending on state 0 about (somethin about no atttributes and maybe no tag name/type)
|
|
|
|
|
std::cout << "HTMLParser::parseTag ending on state " << state << std::endl; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|