Browse Source

non-quoted attribute support

master
Odilitime 5 years ago
parent
commit
5e93788949
  1. 23
      src/html/HTMLParser.cpp

23
src/html/HTMLParser.cpp

@ -249,23 +249,27 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const { @@ -249,23 +249,27 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
std::string propertyKey;
for (cursor = 0; cursor < element.length(); cursor++) {
if (state == 0) {
// space or end
if (element[cursor] == ' ' || element[cursor] == '>') {
// set our tag (type / name, i.e. h1)
tagNode.tag = element.substr(start, cursor - start);
// make sure our tag is lowercase
std::transform(tagNode.tag.begin(), tagNode.tag.end(), tagNode.tag.begin(), tolower);
start = cursor + 1;
state = 1;
}
}
else if (state == 1) {
else if (state == 1) { // attribute search
if (element[cursor] == ' ') {
start = cursor + 1;
}
else if (element[cursor] == '=') {
propertyKey = element.substr(start, cursor - start);
start = cursor + 1; // start for non quotes
state = 2;
}
}
else if (state == 2) {
else if (state == 2) { // after = of attribute
if (element[cursor] == '"') {
start = cursor + 1;
state = 3;
@ -273,6 +277,11 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const { @@ -273,6 +277,11 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
else if (element[cursor] == '\'') {
start = cursor + 1;
state = 4;
} else if (element[cursor] == ' ') {
// we just probably found an end of attribute without quotes
tagNode.properties.insert(std::pair<std::string, std::string>(propertyKey, element.substr(start, cursor - start)));
start = cursor + 1;
state = 1;
}
}
else if (state == 3) {
@ -292,5 +301,15 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const { @@ -292,5 +301,15 @@ void HTMLParser::parseTag(const std::string &element, TagNode &tagNode) const {
}
}
}
// 2 is attribute without quotes, 3/4 is unclosed quote
if (state == 2 || state == 3 || state ==4) {
// we were in an atrr=
tagNode.properties.insert(std::pair<std::string, std::string>(propertyKey, element.substr(start, cursor - start - 1)));
} else {
if (state != 1) {
// so what's ending on state 0 about (somethin about no atttributes and maybe no tag name/type)
std::cout << "HTMLParser::parseTag ending on state " << state << std::endl;
}
}
}

Loading…
Cancel
Save