Selaa lähdekoodia

Fixed issues where trying to load large pages and getting stuck.

Added timeout for large pages.
Added sequential reading for titles so that the entire page doesn't need to be downloaded.
Separated parsing for title to it's own function.
tags/3.1.0
Teknikode 4 vuotta sitten
vanhempi
commit
6f2266a481
1 muutettua tiedostoa jossa 82 lisäystä ja 23 poistoa
  1. 82
    23
      Modules/Url Parsing/Url_Parsing.cs

+ 82
- 23
Modules/Url Parsing/Url_Parsing.cs Näytä tiedosto

@@ -1,4 +1,6 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
@@ -51,28 +53,6 @@ namespace Combot.Modules.Plugins
long contentLength = webResponse.ContentLength;
switch (contentType)
{
case "text":
Regex ytRegex = new Regex("(((youtube.*(v=|/v/))|(youtu\\.be/))(?<ID>[-_a-zA-Z0-9]+))");
if (ytRegex.IsMatch(urlMatch.ToString()))
{
Match ytMatch = ytRegex.Match(urlMatch.ToString());
string youtubeMessage = GetYoutubeDescription(ytMatch.Groups["ID"].Value);
Bot.IRC.Command.SendPrivateMessage(message.Channel, youtubeMessage);
}
else
{
WebClient x = new WebClient();
x.Encoding = Encoding.UTF8;
string source = x.DownloadString(urlMatch.ToString());
string title = Regex.Match(source, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;
int maxTitle = Convert.ToInt32(GetOptionValue("Max Title"));
if (title.Length > (int)maxTitle)
{
title = string.Format("{0}...", title.Substring(0, (int)maxTitle));
}
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] {0} ({1})", HttpUtility.HtmlDecode(HttpUtility.UrlDecode(StripTagsCharArray(title))), url.Host));
}
break;
case "image":
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[{0}] Size: {1}", webResponse.ContentType, ToFileSize(contentLength)));
break;
@@ -85,6 +65,19 @@ namespace Combot.Modules.Plugins
case "audio":
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[Audio] Type: {0} | Size: {1}", webResponse.ContentType.Split('/')[1], ToFileSize(contentLength)));
break;
default:
Regex ytRegex = new Regex("(((youtube.*(v=|/v/))|(youtu\\.be/))(?<ID>[-_a-zA-Z0-9]+))");
if (ytRegex.IsMatch(urlMatch.ToString()))
{
Match ytMatch = ytRegex.Match(urlMatch.ToString());
string youtubeMessage = GetYoutubeDescription(ytMatch.Groups["ID"].Value);
Bot.IRC.Command.SendPrivateMessage(message.Channel, youtubeMessage);
}
else
{
ParseTitle(message, urlMatch.ToString());
}
break;
}
}
else
@@ -103,13 +96,79 @@ namespace Combot.Modules.Plugins
}
catch (OutOfMemoryException ex)
{
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] Site content was too large ({0})", url.Host));
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] \u0002Site content was too large\u0002 ({0})", url.Host));
}
}
}
}
}

public void ParseTitle(ChannelMessage message, string urlString)
{
string title = string.Empty;
bool startTagFound = false;
Uri url = new Uri(urlString);

HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(urlString);
StreamReader streamReader = new StreamReader(req.GetResponse().GetResponseStream());

Char[] buf = new Char[256];
int count = streamReader.Read(buf, 0, 256);

var stopwatch = Stopwatch.StartNew();
TimeSpan timeout = new TimeSpan(0, 0, 15);
while (count > 0 && stopwatch.Elapsed < timeout)
{
String outputData = new String(buf, 0, count);

if (!startTagFound)
{
// check for a full match
Match fullMatch = Regex.Match(outputData, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase);
if (fullMatch.Success)
{
title = fullMatch.Groups["Title"].Value;
break;
}
}

string pattern = string.Empty;
if (startTagFound)
{
pattern = @"^(?<Title>[\s\S]*?)\</title\>";
Match match = Regex.Match(outputData, pattern, RegexOptions.IgnoreCase);
if (match.Success)
{
title += match.Groups["Title"].Value;
break;
}
title += outputData;
}
else
{
pattern = @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)$";
Match match = Regex.Match(outputData, pattern, RegexOptions.IgnoreCase);
if (match.Success)
{
title = match.Groups["Title"].Value;
startTagFound = true;
}
}
count = streamReader.Read(buf, 0, 256);
}
streamReader.Close();

if (!string.IsNullOrEmpty(title))
{
int maxTitle = Convert.ToInt32(GetOptionValue("Max Title"));
if (title.Length > (int)maxTitle)
{
title = string.Format("{0}...", title.Substring(0, (int)maxTitle));
}
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] {0} ({1})", HttpUtility.HtmlDecode(HttpUtility.UrlDecode(StripTagsCharArray(title))), url.Host));
}
}

/// <summary>
/// Remove HTML tags from string using char array.
/// </summary>

Loading…
Peruuta
Tallenna