Browse Source

setPort() refactor, merge() relative support, parseURI fix //host parsing

master
Odilitime 5 years ago
parent
commit
b9781fffcd
  1. 49
      src/URL.cpp
  2. 1
      src/URL.h

49
src/URL.cpp

@ -55,6 +55,8 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) { @@ -55,6 +55,8 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) {
// there is a host
uri->scheme = "relative";
state = AUTHORITY;
cursor += 2; // skip the //
last = 2;
} else {
// relative path
uri->scheme = ""; // probably doesn't matter if it's "" or "relative"
@ -68,21 +70,14 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) { @@ -68,21 +70,14 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) {
return std::make_tuple(std::move(uri), URI_PARSE_ERROR_SCHEME);
}
}
for (cursor = 1; cursor < raw.length(); cursor++) {
//std::cout << "URL::parseUri - starting at " << cursor << " [" << raw[cursor] << "]" << std::endl;
for (; cursor < raw.length(); cursor++) {
/* TODO
* Allow scheme-less uri (and fallback to https/http) */
if (state == SCHEME) {
if (raw[cursor] == ':') {
uri->scheme = toLowercase(raw.substr(0, cursor));
/* TODO
* Put default port now (Should use a table for that but
* I don't know C++ enough) */
if (uri->scheme == "http") {
uri->port = 80;
}
if (uri->scheme == "https"){
uri->port = 443;
}
uri->setPort();
state = FIRST_SLASH;
} else if (!isalpha(raw[cursor]) && !isdigit(raw[cursor]) && raw[cursor] != '+' &&
raw[cursor] != '-' && raw[cursor] != '.' && raw[cursor] != '/') { // why this exception list?
@ -122,8 +117,8 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) { @@ -122,8 +117,8 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) {
// Authority is finished, everything should be considered as the host[port].
// TODO terminated by the next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end of the URI.
// What to do when ? and # ?
} else if (raw[cursor] == '/') {
if (lastSemicolon > 0) {
} else if (raw[cursor] == '/') { // end if we run into /
if (lastSemicolon > 0) { // FIXME: it's a colon not semicolon
// TODO Validate port
if (cursor - lastSemicolon - 1 > 0) {
uri->port = std::stoi(raw.substr(lastSemicolon+1, cursor - lastSemicolon+1));
@ -143,7 +138,7 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) { @@ -143,7 +138,7 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) {
} else {
state = FRAGMENT;
}
} else if (cursor + 1 == raw.length()) {
} else if (cursor + 1 == raw.length()) { // if at end of input
uri->host = raw.substr(last, lastSemicolon - last);
uri->path = "/";
break;
@ -158,7 +153,7 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) { @@ -158,7 +153,7 @@ std::tuple<std::unique_ptr<URL>,enum URIParseError> parseUri(std::string raw) {
} else if (state == AUTHORITY_HOST) {
if (raw[cursor] == ':') {
uri->host = raw.substr(last, cursor - last);
last = cursor+1;
last = cursor + 1;
state = AUTHORITY_PORT;
} else if (raw[cursor] == '/') {
uri->host = raw.substr(last, cursor - last);
@ -243,6 +238,18 @@ URL::URL(std::string const& url) { @@ -243,6 +238,18 @@ URL::URL(std::string const& url) {
construct(url);
}
void URL::setPort() {
/* TODO
* Put default port now (Should use a table for that but
* I don't know C++ enough) */
if (this->scheme == "http") {
this->port = 80;
}
if (this->scheme == "https") {
this->port = 443;
}
}
std::string URL::toString() const {
//std::cout << "scheme[" << scheme << "] host[" << host << "] path [" << path << "] query[" << query << "]" << std::endl;
if (isRelative()) {
@ -264,6 +271,12 @@ bool URL::isRelative() const { @@ -264,6 +271,12 @@ bool URL::isRelative() const {
}
URL URL::merge(URL const& url) const {
if (url.scheme == "relative") {
URL returnURL = url.copy();
returnURL.scheme = this->scheme;
returnURL.setPort();
return returnURL;
}
if (!url.isRelative()) {
return url.copy();
}
@ -271,7 +284,8 @@ URL URL::merge(URL const& url) const { @@ -271,7 +284,8 @@ URL URL::merge(URL const& url) const {
URL returnURL = copy();
//std::cout << "URL::merge - source " << url.toString() << std::endl;
//std::cout << "URL::merge - copy " << returnURL.toString() << std::endl;
// if path is //
if (url.path[0] == '/' && url.path[1] == '/') {
auto slashPos = url.path.find('/', 2);
returnURL.host = url.path.substr(2, slashPos - 2);
@ -280,14 +294,15 @@ URL URL::merge(URL const& url) const { @@ -280,14 +294,15 @@ URL URL::merge(URL const& url) const {
} else {
returnURL.path = url.path.substr(slashPos);
}
} else if (url.path[0] == '/') {
} else if (url.path[0] == '/') { // is absolute path
returnURL.path = url.path;
// FIXME: make smarter about merging query strings
if (url.query !="") {
returnURL.query += "&"+url.query;
}
} else {
if (returnURL.path.back() != '/') {
if (returnURL.path.back() != '/') { // if doesn't end at /
// strip off up to last slash
auto finalSlashPos = returnURL.path.find_last_of('/');
returnURL.path.erase(finalSlashPos + 1);
}

1
src/URL.h

@ -15,6 +15,7 @@ struct URL { @@ -15,6 +15,7 @@ struct URL {
URL();
URL(std::string const& url);
void setPort();
std::string toString() const;
bool isRelative() const;
URL merge(URL const& url) const;

Loading…
Cancel
Save