using System; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Net; using System.Net.Http; using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using HtmlAgilityPack; namespace BukkuBuddy.Services { public class WebPageService { private readonly HttpClient _httpClient; public WebPageService(bool allowUnsafeSSL, int timeout, bool allowCookies, bool allowRedirect) { var handler = new HttpClientHandler(); handler.AllowAutoRedirect = allowRedirect; if (allowUnsafeSSL) { handler.ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator; } if (allowCookies) { handler.UseCookies = allowCookies; handler.CookieContainer = new CookieContainer(); } _httpClient = new HttpClient(handler); _httpClient.Timeout = TimeSpan.FromSeconds(timeout); } public async Task GetDocument(string url, CancellationToken cancellationToken = default) { var sourceCode = await this.GetSource(url, cancellationToken); if (string.IsNullOrWhiteSpace(sourceCode)) { return null; } var document = new HtmlAgilityPack.HtmlDocument(); document.LoadHtml(sourceCode); return document; } public async Task GetSource(string url, CancellationToken cancellationToken = default) { if (string.IsNullOrWhiteSpace(url)) { return null; } var userAgent = GenerateUserAgent(); using var request = new HttpRequestMessage(HttpMethod.Get, url); request.Headers.UserAgent.ParseAdd(userAgent); using var response = await _httpClient.SendAsync(request, cancellationToken); if (!response.IsSuccessStatusCode) { return null; } response.EnsureSuccessStatusCode(); return await response.Content.ReadAsStringAsync(cancellationToken); } public async Task GetImage(string url, CancellationToken cancellationToken = default) { try { var userAgent = GenerateUserAgent(); using var request = new HttpRequestMessage(HttpMethod.Get, url); request.Headers.UserAgent.ParseAdd(userAgent); using var response = await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); if (!response.IsSuccessStatusCode) { return null; } response.EnsureSuccessStatusCode(); var contentType = response.Content.Headers.ContentType?.MediaType; if (contentType == null || !contentType.StartsWith("image/", StringComparison.OrdinalIgnoreCase)) { return null; } await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); using var memoryStream = new MemoryStream(); await stream.CopyToAsync(memoryStream, cancellationToken); memoryStream.Position = 0; var image = Image.FromStream(memoryStream); return new Bitmap(image); } catch (Exception) { // Do nothing } return null; } public string ParseTitle(HtmlAgilityPack.HtmlDocument document) { // Find basic title var result = FindNodeValue(document, "//title", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } // Find title from extended meta var patternList = new List() { "//meta[@property='og:title']", "//meta[@property='og:site_name']", "//meta[@name='twitter:title']", "//meta[@itemprop='name']" }; foreach (var item in patternList) { result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); if (string.IsNullOrWhiteSpace(result)) { continue; } return result; } return null; } public string ParseDescription(HtmlAgilityPack.HtmlDocument document) { var patternList = new List() { "//meta[@name='description']", "//meta[@property='og:description']", "//meta[@name='twitter:description']", "//meta[@itemprop='description']", }; foreach (var item in patternList) { var result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); if (string.IsNullOrWhiteSpace(result)) { continue; } return result; } return string.Empty; } public string ParseFavicon(HtmlAgilityPack.HtmlDocument document) { string result = null; // Find link-rel that contains word result = FindNodeAttrValue_ContainsWord(document, "link", "rel", "href", "icon"); if (!string.IsNullOrWhiteSpace(result)) { return result; } // Find link-rel contains apple-icon var matchPatterns = new List() { "apple-touch-icon", "apple-touch-icon-precomposed" }; result = FindNodeAttrValue_Equals(document, "link", "rel", "href", matchPatterns); if (!string.IsNullOrWhiteSpace(result)) { return result; } // Find favicon from extended meta var patternList = new List() { "//meta[@property='og:image']", "//meta[@name='twitter:image']", "//meta[@itemprop='image']" }; foreach (var item in patternList) { result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); if (string.IsNullOrWhiteSpace(result)) { continue; } return result; } return string.Empty; } public bool IsValidUrl(string url) { if (string.IsNullOrWhiteSpace(url)) { return false; } if (!url.StartsWith("http://", StringComparison.CurrentCultureIgnoreCase) && !url.StartsWith("https://", StringComparison.CurrentCultureIgnoreCase)) { return false; } return true; } private string FindNodeValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "") { var hnc = document.DocumentNode.SelectNodes(xPath); if (hnc == null) { return defaultValue; } if (hnc.Count <= 0) { return defaultValue; } foreach (HtmlNode hn in hnc) { if (string.IsNullOrWhiteSpace(hn.InnerHtml)) { continue; } var result = WebUtility.HtmlDecode(hn.InnerHtml)?.Replace("\r", "")?.Replace("\n", " ")?.Trim(); if (string.IsNullOrWhiteSpace(result)) { continue; } return result; } return defaultValue; } private string FindNodeAttrValue(HtmlAgilityPack.HtmlDocument document, string xPath, string attr, string defaultValue = "") { var hnc = document.DocumentNode.SelectNodes(xPath); if (hnc == null) { return defaultValue; } if (hnc.Count <= 0) { return defaultValue; } foreach (HtmlNode hn in hnc) { if (hn.Attributes[attr] == null) { continue; } if (string.IsNullOrWhiteSpace(hn.Attributes[attr].Value)) { continue; } return System.Web.HttpUtility.HtmlDecode(hn.Attributes[attr].Value?.Trim()); } return defaultValue; } private List FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName) { var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]"); var hnc = document.DocumentNode.SelectNodes(xPath); if (hnc == null) { return new List(); } if (hnc.Count <= 0) { return new List(); } return hnc.ToList(); } private string FindNodeAttrValue_ContainsWord(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, string matchEqualList, string defaultValue = "") { var linkNodes = FindNode(document, nodeName, attrName); foreach (var item in linkNodes) { var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty; if (!ContainsWord(relValue, matchEqualList)) { continue; } var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty; if (string.IsNullOrWhiteSpace(hrefValue)) { continue; } return System.Web.HttpUtility.HtmlDecode(hrefValue); } return defaultValue; } private string FindNodeAttrValue_Equals(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, List matchValueList, string defaultValue = "") { var linkNodes = FindNode(document, nodeName, attrName); foreach (var item in linkNodes) { var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty; if (!matchValueList.Contains(relValue?.ToLower() ?? string.Empty)) { continue; } var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty; if (string.IsNullOrWhiteSpace(hrefValue)) { continue; } return System.Web.HttpUtility.HtmlDecode(hrefValue); } return defaultValue; } private bool ContainsWord(string haystack, string needle) { haystack = haystack?.Trim() ?? string.Empty; if (!haystack.Contains(" ")) { return haystack.Equals(needle, StringComparison.CurrentCultureIgnoreCase); } foreach (var item in haystack.Split(" ")) { if (string.IsNullOrWhiteSpace(item)) { continue; } if (item.Equals(needle, StringComparison.CurrentCultureIgnoreCase)) { return true; } } return false; } private string GenerateUserAgent() { var os = GetWindowsVersion(); var arch = RuntimeInformation.OSArchitecture switch { Architecture.X64 => "Win64; x64", Architecture.X86 => "Win32", Architecture.Arm64 => "ARM64", _ => "Win64; x64" }; var chromeVersion = GetChromeLikeVersion(); return $"Mozilla/5.0 (Windows NT {os}; {arch}) Momozilla/5.0 () AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{chromeVersion} Safari/537.36"; } private string GetWindowsVersion() { var v = Environment.OSVersion.Version; // Map to common Windows NT versions return v.Major switch { 10 => "10.0", // Windows 10/11 both report 10.0 6 when v.Minor == 3 => "6.3", // Windows 8.1 6 when v.Minor == 2 => "6.2", // Windows 8 6 when v.Minor == 1 => "6.1", // Windows 7 _ => $"{v.Major}.{v.Minor}" }; } private string GetChromeLikeVersion() { // You can hardcode or randomize within a realistic range var rnd = new Random(); int major = rnd.Next(120, 126); // recent Chrome versions int build = rnd.Next(0, 7000); int patch = rnd.Next(0, 200); return $"{major}.0.{build}.{patch}"; } } }