From b4f236266a79c667d143e57e10736d508d68e347 Mon Sep 17 00:00:00 2001 From: Ray Date: Sun, 15 Sep 2024 12:48:31 +0100 Subject: [PATCH] Refactored WebProvider for clarity --- EditBookmarkForm.cs | 2 +- Services/WebProvider.cs | 260 ++++++++++++++++------------------------ 2 files changed, 103 insertions(+), 159 deletions(-) diff --git a/EditBookmarkForm.cs b/EditBookmarkForm.cs index 19b1e89..9cc8365 100644 --- a/EditBookmarkForm.cs +++ b/EditBookmarkForm.cs @@ -408,7 +408,7 @@ namespace FizzyLauncher if (updateDescription) { - textBox3.Text = _webProvider.ParseMetaDescription(document); + textBox3.Text = _webProvider.ParseDescription(document); } if (updateIcon) diff --git a/Services/WebProvider.cs b/Services/WebProvider.cs index 602364d..710c0dc 100644 --- a/Services/WebProvider.cs +++ b/Services/WebProvider.cs @@ -3,7 +3,6 @@ using System.Collections.Generic; using System.Drawing; using System.Linq; using System.Net; -using System.Security.Policy; using System.Threading.Tasks; using HtmlAgilityPack; using RyzStudio.Net; @@ -178,168 +177,102 @@ namespace BookmarkManager.Services { string result = null; - result = ParseTagValue(document, "//title", string.Empty)?.Trim(); + // Find basic title + result = FindNodeValue(document, "//title", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = FindNodeAttrValue(document, "//meta[@property='og:title']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) + // Find title from extended meta + var patternList = new List() { - return result; - } + "//meta[@property='og:title']", + "//meta[@property='og:site_name']", + "//meta[@name='twitter:title']", + "//meta[@itemprop='name']" + }; - result = FindNodeAttrValue(document, "//meta[@name='twitter:title']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) + foreach (var item in patternList) { - return result; - } + result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); + if (string.IsNullOrWhiteSpace(result)) + { + continue; + } - result = FindNodeAttrValue(document, "//meta[@property='og:site_name']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = FindNodeAttrValue(document, "//meta[@itemprop='name']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { return result; } return string.Empty; } - public string ParseMetaDescription(HtmlAgilityPack.HtmlDocument document) + public string ParseDescription(HtmlAgilityPack.HtmlDocument document) { - string result = null; - - result = FindNodeAttrValue(document, "//meta[@name='description']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) + var patternList = new List() { + "//meta[@name='description']", + "//meta[@property='og:description']", + "//meta[@name='twitter:description']", + "//meta[@itemprop='description']", + }; + + foreach (var item in patternList) + { + var result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); + if (string.IsNullOrWhiteSpace(result)) + { + continue; + } + return result; } - result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = FindNodeAttrValue(document, "//meta[@name='twitter:description']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = FindNodeAttrValue(document, "//meta[@itemprop='description']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - return result; + return string.Empty; } public string ParseFavicon(HtmlAgilityPack.HtmlDocument document) { string result = null; - //var tt1 = FindNode_AtrributeContains(document, "//link[contains(@rel, 'icon')]", "href", string.Empty); - //var tt1 = FindNode_AtrributeContains(document, "link", "rel", "icon"); - - // Find link-rel contains "icon" - var linkNodes = FindNode(document, "link", "rel"); - foreach (var item in linkNodes) + // Find link-rel that contains word + result = FindNodeAttrValue_ContainsWord(document, "link", "rel", "href", "icon"); + if (!string.IsNullOrWhiteSpace(result)) { - var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; - if (!ContainsWord(relValue, "icon")) - { - continue; - } - - var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty; - if (string.IsNullOrWhiteSpace(hrefValue)) - { - continue; - } - - return System.Web.HttpUtility.HtmlDecode(hrefValue); + return result; } // Find link-rel contains apple-icon - var appleIconPatterns = new List() { "apple-touch-icon", "apple-touch-icon-precomposed" }; - - foreach (var item in linkNodes) + var matchPatterns = new List() { "apple-touch-icon", "apple-touch-icon-precomposed" }; + result = FindNodeAttrValue_Equals(document, "link", "rel", "href", matchPatterns); + if (!string.IsNullOrWhiteSpace(result)) { - var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; - if (!appleIconPatterns.Contains(relValue?.ToLower() ?? string.Empty)) + return result; + } + + // Find favicon from extended meta + var patternList = new List() + { + "//meta[@property='og:image']", + "//meta[@name='twitter:image']", + "//meta[@itemprop='image']" + }; + + foreach (var item in patternList) + { + result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim(); + if (string.IsNullOrWhiteSpace(result)) { continue; } - var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty; - if (string.IsNullOrWhiteSpace(hrefValue)) - { - continue; - } - - return System.Web.HttpUtility.HtmlDecode(hrefValue); - } - - //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim(); - //if (!string.IsNullOrWhiteSpace(result)) - //{ - // return result; - //} - - //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim(); - //if (!string.IsNullOrWhiteSpace(result)) - //{ - // return result; - //} - - //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim(); - //if (!string.IsNullOrWhiteSpace(result)) - //{ - // return result; - //} - - //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim(); - //if (!string.IsNullOrWhiteSpace(result)) - //{ - // return result; - //} - - result = FindNodeAttrValue(document, "//meta[@property='og:image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { return result; } - result = FindNodeAttrValue(document, "//meta[@name='twitter:image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = FindNodeAttrValue(document, "//meta[@itemprop='image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - return "/favicon.ico"; + return string.Empty; } - private string ParseTagValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "") + private string FindNodeValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "") { var hnc = document.DocumentNode.SelectNodes(xPath); if (hnc == null) @@ -402,40 +335,6 @@ namespace BookmarkManager.Services return defaultValue; } - //private List FindNode_AtrributeContains(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string findValue) - //{ - // var response = new List(); - - // var xPath = $"//{nodeName}[@{attrName}]"; - // var hnc = document.DocumentNode.SelectNodes(xPath); - // if (hnc == null) - // { - // return response; - // } - - // if (hnc.Count <= 0) - // { - // return response; - // } - - // foreach (HtmlNode item in hnc) - // { - // if (!item.Attributes.Contains(attrName)) - // { - // continue; - // } - - // if (!ContainsWord(item.Attributes[attrName].Value ?? string.Empty, findValue)) - // { - // continue; - // } - - // response.Add(item); - // } - - // return response; - //} - private List FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName) { var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]"); @@ -453,6 +352,52 @@ namespace BookmarkManager.Services return hnc.ToList(); } + private string FindNodeAttrValue_ContainsWord(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, string matchEqualList, string defaultValue = "") + { + var linkNodes = FindNode(document, nodeName, attrName); + foreach (var item in linkNodes) + { + var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty; + if (!ContainsWord(relValue, matchEqualList)) + { + continue; + } + + var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty; + if (string.IsNullOrWhiteSpace(hrefValue)) + { + continue; + } + + return System.Web.HttpUtility.HtmlDecode(hrefValue); + } + + return defaultValue; + } + + private string FindNodeAttrValue_Equals(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, List matchValueList, string defaultValue = "") + { + var linkNodes = FindNode(document, nodeName, attrName); + foreach (var item in linkNodes) + { + var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty; + if (!matchValueList.Contains(relValue?.ToLower() ?? string.Empty)) + { + continue; + } + + var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty; + if (string.IsNullOrWhiteSpace(hrefValue)) + { + continue; + } + + return System.Web.HttpUtility.HtmlDecode(hrefValue); + } + + return defaultValue; + } + private bool ContainsWord(string haystack, string needle) { haystack = haystack?.Trim() ?? string.Empty; @@ -478,6 +423,5 @@ namespace BookmarkManager.Services return false; } - } } \ No newline at end of file