From 5f2aeb78bffab698f5dfad8aed6fe9aba002ee31 Mon Sep 17 00:00:00 2001 From: Ray Date: Sat, 14 Sep 2024 23:03:06 +0100 Subject: [PATCH] Changed to improve favicon detection --- BookmarkManager.csproj | 2 +- EditBookmarkForm.cs | 3 +- Services/WebProvider.cs | 216 ++++++++++++++++++++++++++++++++-------- UpdateIconsForm.cs | 2 +- build-installer.iss | 2 +- 5 files changed, 177 insertions(+), 48 deletions(-) diff --git a/BookmarkManager.csproj b/BookmarkManager.csproj index 5cefae5..f7a4438 100644 --- a/BookmarkManager.csproj +++ b/BookmarkManager.csproj @@ -14,7 +14,7 @@ Ray Lam 1.0.0.0 1.0.0.0 - 0.6.1.0249 + 0.6.2.092 bukkubuddy True 8.0 diff --git a/EditBookmarkForm.cs b/EditBookmarkForm.cs index 08d53f3..19b1e89 100644 --- a/EditBookmarkForm.cs +++ b/EditBookmarkForm.cs @@ -3,6 +3,7 @@ using System.ComponentModel; using System.Drawing; using System.Threading.Tasks; using System.Windows.Forms; +using System.Xml; using BookmarkManager.Services; using bzit.bomg.Models; using RyzStudio.Windows.Forms; @@ -414,7 +415,7 @@ namespace FizzyLauncher { try { - pictureBox1.Image = await _webProvider.RetrieveImage(document); + pictureBox1.Image = await _webProvider.RetrieveImage(url, document); if (pictureBox1.Image != null) { if (pictureBox1.Image.Width > 16) diff --git a/Services/WebProvider.cs b/Services/WebProvider.cs index c592dd3..602364d 100644 --- a/Services/WebProvider.cs +++ b/Services/WebProvider.cs @@ -1,6 +1,9 @@ using System; +using System.Collections.Generic; using System.Drawing; +using System.Linq; using System.Net; +using System.Security.Policy; using System.Threading.Tasks; using HtmlAgilityPack; using RyzStudio.Net; @@ -148,7 +151,7 @@ namespace BookmarkManager.Services return result; } - public async Task RetrieveImage(HtmlAgilityPack.HtmlDocument document) + public async Task RetrieveImage(string url, HtmlAgilityPack.HtmlDocument document) { var iconUrl = this.ParseFavicon(document); if (string.IsNullOrWhiteSpace(iconUrl)) @@ -156,6 +159,18 @@ namespace BookmarkManager.Services return null; } + try + { + var baseUri = new Uri(url); + var absoluteUri = new Uri(baseUri, iconUrl); + + iconUrl = absoluteUri.AbsoluteUri; + } + catch + { + return null; + } + return await this.RetrieveImage(iconUrl); } @@ -169,25 +184,25 @@ namespace BookmarkManager.Services return result; } - result = ParseTagValue_Attr(document, "//meta[@property='og:title']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@property='og:title']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@name='twitter:title']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@name='twitter:title']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@property='og:site_name']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@property='og:site_name']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@itemprop='name']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@itemprop='name']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; @@ -200,31 +215,31 @@ namespace BookmarkManager.Services { string result = null; - result = ParseTagValue_Attr(document, "//meta[@name='description']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@name='description']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@name='twitter:description']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@name='twitter:description']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//meta[@itemprop='description']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@itemprop='description']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; @@ -237,49 +252,85 @@ namespace BookmarkManager.Services { string result = null; - result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim(); + //var tt1 = FindNode_AtrributeContains(document, "//link[contains(@rel, 'icon')]", "href", string.Empty); + //var tt1 = FindNode_AtrributeContains(document, "link", "rel", "icon"); + + // Find link-rel contains "icon" + var linkNodes = FindNode(document, "link", "rel"); + foreach (var item in linkNodes) + { + var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; + if (!ContainsWord(relValue, "icon")) + { + continue; + } + + var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty; + if (string.IsNullOrWhiteSpace(hrefValue)) + { + continue; + } + + return System.Web.HttpUtility.HtmlDecode(hrefValue); + } + + // Find link-rel contains apple-icon + var appleIconPatterns = new List() { "apple-touch-icon", "apple-touch-icon-precomposed" }; + + foreach (var item in linkNodes) + { + var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; + if (!appleIconPatterns.Contains(relValue?.ToLower() ?? string.Empty)) + { + continue; + } + + var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty; + if (string.IsNullOrWhiteSpace(hrefValue)) + { + continue; + } + + return System.Web.HttpUtility.HtmlDecode(hrefValue); + } + + //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim(); + //if (!string.IsNullOrWhiteSpace(result)) + //{ + // return result; + //} + + //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim(); + //if (!string.IsNullOrWhiteSpace(result)) + //{ + // return result; + //} + + //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim(); + //if (!string.IsNullOrWhiteSpace(result)) + //{ + // return result; + //} + + //result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim(); + //if (!string.IsNullOrWhiteSpace(result)) + //{ + // return result; + //} + + result = FindNodeAttrValue(document, "//meta[@property='og:image']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@name='twitter:image']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; } - result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = ParseTagValue_Attr(document, "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = ParseTagValue_Attr(document, "//meta[translate(@name, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'twitter:image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = ParseTagValue_Attr(document, "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']", "content", string.Empty)?.Trim(); - if (!string.IsNullOrWhiteSpace(result)) - { - return result; - } - - result = ParseTagValue_Attr(document, "//meta[translate(@itemprop, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'image']", "content", string.Empty)?.Trim(); + result = FindNodeAttrValue(document, "//meta[@itemprop='image']", "content", string.Empty)?.Trim(); if (!string.IsNullOrWhiteSpace(result)) { return result; @@ -320,7 +371,7 @@ namespace BookmarkManager.Services return defaultValue; } - private string ParseTagValue_Attr(HtmlAgilityPack.HtmlDocument document, string xPath, string attr, string defaultValue = "") + private string FindNodeAttrValue(HtmlAgilityPack.HtmlDocument document, string xPath, string attr, string defaultValue = "") { var hnc = document.DocumentNode.SelectNodes(xPath); if (hnc == null) @@ -351,5 +402,82 @@ namespace BookmarkManager.Services return defaultValue; } + //private List FindNode_AtrributeContains(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string findValue) + //{ + // var response = new List(); + + // var xPath = $"//{nodeName}[@{attrName}]"; + // var hnc = document.DocumentNode.SelectNodes(xPath); + // if (hnc == null) + // { + // return response; + // } + + // if (hnc.Count <= 0) + // { + // return response; + // } + + // foreach (HtmlNode item in hnc) + // { + // if (!item.Attributes.Contains(attrName)) + // { + // continue; + // } + + // if (!ContainsWord(item.Attributes[attrName].Value ?? string.Empty, findValue)) + // { + // continue; + // } + + // response.Add(item); + // } + + // return response; + //} + + private List FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName) + { + var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]"); + var hnc = document.DocumentNode.SelectNodes(xPath); + if (hnc == null) + { + return new List(); + } + + if (hnc.Count <= 0) + { + return new List(); + } + + return hnc.ToList(); + } + + private bool ContainsWord(string haystack, string needle) + { + haystack = haystack?.Trim() ?? string.Empty; + + if (!haystack.Contains(" ")) + { + return haystack.Equals(needle, StringComparison.CurrentCultureIgnoreCase); + } + + foreach (var item in haystack.Split(" ")) + { + if (string.IsNullOrWhiteSpace(item)) + { + continue; + } + + if (item.Equals(needle, StringComparison.CurrentCultureIgnoreCase)) + { + return true; + } + } + + return false; + } + + } } \ No newline at end of file diff --git a/UpdateIconsForm.cs b/UpdateIconsForm.cs index 91477cc..db85498 100644 --- a/UpdateIconsForm.cs +++ b/UpdateIconsForm.cs @@ -360,7 +360,7 @@ namespace FizzyLauncher try { - var image = await _webProvider.RetrieveImage(document); + var image = await _webProvider.RetrieveImage(item.Value.Address, document); if (image != null) { if (image.Width > 16) diff --git a/build-installer.iss b/build-installer.iss index ce48110..341125e 100644 --- a/build-installer.iss +++ b/build-installer.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "BukkuBuddy Bookmark Manager" -#define MyAppVersion "0.6.0.716" +#define MyAppVersion "0.6.2.092" #define MyAppPublisher "Hi, I'm Ray" #define MyAppURL "https://www.hiimray.co.uk/software-bookmark-manager" #define MyAppExeName "bukkubuddy.exe"