浏览代码

Fixed issues where trying to load large pages and getting stuck.

Added timeout for large pages.
Added sequential reading for titles so that the entire page doesn't need to be downloaded.
Separated parsing for title to it's own function.
tags/3.1.0
Teknikode 4 年前
父节点
当前提交
6f2266a481
共有 1 个文件被更改,包括 82 次插入23 次删除
  1. 82
    23
      Modules/Url Parsing/Url_Parsing.cs

+ 82
- 23
Modules/Url Parsing/Url_Parsing.cs 查看文件

@@ -1,4 +1,6 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
@@ -51,28 +53,6 @@ namespace Combot.Modules.Plugins
long contentLength = webResponse.ContentLength;
switch (contentType)
{
case "text":
Regex ytRegex = new Regex("(((youtube.*(v=|/v/))|(youtu\\.be/))(?<ID>[-_a-zA-Z0-9]+))");
if (ytRegex.IsMatch(urlMatch.ToString()))
{
Match ytMatch = ytRegex.Match(urlMatch.ToString());
string youtubeMessage = GetYoutubeDescription(ytMatch.Groups["ID"].Value);
Bot.IRC.Command.SendPrivateMessage(message.Channel, youtubeMessage);
}
else
{
WebClient x = new WebClient();
x.Encoding = Encoding.UTF8;
string source = x.DownloadString(urlMatch.ToString());
string title = Regex.Match(source, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;
int maxTitle = Convert.ToInt32(GetOptionValue("Max Title"));
if (title.Length > (int)maxTitle)
{
title = string.Format("{0}...", title.Substring(0, (int)maxTitle));
}
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] {0} ({1})", HttpUtility.HtmlDecode(HttpUtility.UrlDecode(StripTagsCharArray(title))), url.Host));
}
break;
case "image":
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[{0}] Size: {1}", webResponse.ContentType, ToFileSize(contentLength)));
break;
@@ -85,6 +65,19 @@ namespace Combot.Modules.Plugins
case "audio":
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[Audio] Type: {0} | Size: {1}", webResponse.ContentType.Split('/')[1], ToFileSize(contentLength)));
break;
default:
Regex ytRegex = new Regex("(((youtube.*(v=|/v/))|(youtu\\.be/))(?<ID>[-_a-zA-Z0-9]+))");
if (ytRegex.IsMatch(urlMatch.ToString()))
{
Match ytMatch = ytRegex.Match(urlMatch.ToString());
string youtubeMessage = GetYoutubeDescription(ytMatch.Groups["ID"].Value);
Bot.IRC.Command.SendPrivateMessage(message.Channel, youtubeMessage);
}
else
{
ParseTitle(message, urlMatch.ToString());
}
break;
}
}
else
@@ -103,13 +96,79 @@ namespace Combot.Modules.Plugins
}
catch (OutOfMemoryException ex)
{
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] Site content was too large ({0})", url.Host));
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] \u0002Site content was too large\u0002 ({0})", url.Host));
}
}
}
}
}

public void ParseTitle(ChannelMessage message, string urlString)
{
string title = string.Empty;
bool startTagFound = false;
Uri url = new Uri(urlString);

HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(urlString);
StreamReader streamReader = new StreamReader(req.GetResponse().GetResponseStream());

Char[] buf = new Char[256];
int count = streamReader.Read(buf, 0, 256);

var stopwatch = Stopwatch.StartNew();
TimeSpan timeout = new TimeSpan(0, 0, 15);
while (count > 0 && stopwatch.Elapsed < timeout)
{
String outputData = new String(buf, 0, count);

if (!startTagFound)
{
// check for a full match
Match fullMatch = Regex.Match(outputData, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase);
if (fullMatch.Success)
{
title = fullMatch.Groups["Title"].Value;
break;
}
}

string pattern = string.Empty;
if (startTagFound)
{
pattern = @"^(?<Title>[\s\S]*?)\</title\>";
Match match = Regex.Match(outputData, pattern, RegexOptions.IgnoreCase);
if (match.Success)
{
title += match.Groups["Title"].Value;
break;
}
title += outputData;
}
else
{
pattern = @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)$";
Match match = Regex.Match(outputData, pattern, RegexOptions.IgnoreCase);
if (match.Success)
{
title = match.Groups["Title"].Value;
startTagFound = true;
}
}
count = streamReader.Read(buf, 0, 256);
}
streamReader.Close();

if (!string.IsNullOrEmpty(title))
{
int maxTitle = Convert.ToInt32(GetOptionValue("Max Title"));
if (title.Length > (int)maxTitle)
{
title = string.Format("{0}...", title.Substring(0, (int)maxTitle));
}
Bot.IRC.Command.SendPrivateMessage(message.Channel, string.Format("[URL] {0} ({1})", HttpUtility.HtmlDecode(HttpUtility.UrlDecode(StripTagsCharArray(title))), url.Host));
}
}

/// <summary>
/// Remove HTML tags from string using char array.
/// </summary>

正在加载...
取消
保存