Refactored WebProvider for clarity

This commit is contained in:
Ray 2024-09-15 12:48:31 +01:00
parent 5f2aeb78bf
commit b4f236266a
2 changed files with 103 additions and 159 deletions

View File

@ -408,7 +408,7 @@ namespace FizzyLauncher
if (updateDescription) if (updateDescription)
{ {
textBox3.Text = _webProvider.ParseMetaDescription(document); textBox3.Text = _webProvider.ParseDescription(document);
} }
if (updateIcon) if (updateIcon)

View File

@ -3,7 +3,6 @@ using System.Collections.Generic;
using System.Drawing; using System.Drawing;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
using System.Security.Policy;
using System.Threading.Tasks; using System.Threading.Tasks;
using HtmlAgilityPack; using HtmlAgilityPack;
using RyzStudio.Net; using RyzStudio.Net;
@ -178,168 +177,102 @@ namespace BookmarkManager.Services
{ {
string result = null; string result = null;
result = ParseTagValue(document, "//title", string.Empty)?.Trim(); // Find basic title
result = FindNodeValue(document, "//title", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result)) if (!string.IsNullOrWhiteSpace(result))
{ {
return result; return result;
} }
result = FindNodeAttrValue(document, "//meta[@property='og:title']", "content", string.Empty)?.Trim(); // Find title from extended meta
if (!string.IsNullOrWhiteSpace(result)) var patternList = new List<string>()
{ {
return result; "//meta[@property='og:title']",
"//meta[@property='og:site_name']",
"//meta[@name='twitter:title']",
"//meta[@itemprop='name']"
};
foreach (var item in patternList)
{
result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
if (string.IsNullOrWhiteSpace(result))
{
continue;
} }
result = FindNodeAttrValue(document, "//meta[@name='twitter:title']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result;
}
result = FindNodeAttrValue(document, "//meta[@property='og:site_name']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result;
}
result = FindNodeAttrValue(document, "//meta[@itemprop='name']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result; return result;
} }
return string.Empty; return string.Empty;
} }
public string ParseMetaDescription(HtmlAgilityPack.HtmlDocument document) public string ParseDescription(HtmlAgilityPack.HtmlDocument document)
{ {
string result = null; var patternList = new List<string>()
{
"//meta[@name='description']",
"//meta[@property='og:description']",
"//meta[@name='twitter:description']",
"//meta[@itemprop='description']",
};
result = FindNodeAttrValue(document, "//meta[@name='description']", "content", string.Empty)?.Trim(); foreach (var item in patternList)
if (!string.IsNullOrWhiteSpace(result))
{ {
return result; var result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
} if (string.IsNullOrWhiteSpace(result))
result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{ {
return result; continue;
}
result = FindNodeAttrValue(document, "//meta[@name='twitter:description']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result;
}
result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result;
}
result = FindNodeAttrValue(document, "//meta[@itemprop='description']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result;
} }
return result; return result;
} }
return string.Empty;
}
public string ParseFavicon(HtmlAgilityPack.HtmlDocument document) public string ParseFavicon(HtmlAgilityPack.HtmlDocument document)
{ {
string result = null; string result = null;
//var tt1 = FindNode_AtrributeContains(document, "//link[contains(@rel, 'icon')]", "href", string.Empty); // Find link-rel that contains word
//var tt1 = FindNode_AtrributeContains(document, "link", "rel", "icon"); result = FindNodeAttrValue_ContainsWord(document, "link", "rel", "href", "icon");
if (!string.IsNullOrWhiteSpace(result))
// Find link-rel contains "icon"
var linkNodes = FindNode(document, "link", "rel");
foreach (var item in linkNodes)
{ {
var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; return result;
if (!ContainsWord(relValue, "icon"))
{
continue;
}
var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(hrefValue))
{
continue;
}
return System.Web.HttpUtility.HtmlDecode(hrefValue);
} }
// Find link-rel contains apple-icon // Find link-rel contains apple-icon
var appleIconPatterns = new List<string>() { "apple-touch-icon", "apple-touch-icon-precomposed" }; var matchPatterns = new List<string>() { "apple-touch-icon", "apple-touch-icon-precomposed" };
result = FindNodeAttrValue_Equals(document, "link", "rel", "href", matchPatterns);
foreach (var item in linkNodes) if (!string.IsNullOrWhiteSpace(result))
{ {
var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty; return result;
if (!appleIconPatterns.Contains(relValue?.ToLower() ?? string.Empty)) }
// Find favicon from extended meta
var patternList = new List<string>()
{
"//meta[@property='og:image']",
"//meta[@name='twitter:image']",
"//meta[@itemprop='image']"
};
foreach (var item in patternList)
{
result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
if (string.IsNullOrWhiteSpace(result))
{ {
continue; continue;
} }
var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(hrefValue))
{
continue;
}
return System.Web.HttpUtility.HtmlDecode(hrefValue);
}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
result = FindNodeAttrValue(document, "//meta[@property='og:image']", "content", string.Empty)?.Trim();
if (!string.IsNullOrWhiteSpace(result))
{
return result; return result;
} }
result = FindNodeAttrValue(document, "//meta[@name='twitter:image']", "content", string.Empty)?.Trim(); return string.Empty;
if (!string.IsNullOrWhiteSpace(result))
{
return result;
} }
result = FindNodeAttrValue(document, "//meta[@itemprop='image']", "content", string.Empty)?.Trim(); private string FindNodeValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "")
if (!string.IsNullOrWhiteSpace(result))
{
return result;
}
return "/favicon.ico";
}
private string ParseTagValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "")
{ {
var hnc = document.DocumentNode.SelectNodes(xPath); var hnc = document.DocumentNode.SelectNodes(xPath);
if (hnc == null) if (hnc == null)
@ -402,40 +335,6 @@ namespace BookmarkManager.Services
return defaultValue; return defaultValue;
} }
//private List<HtmlNode> FindNode_AtrributeContains(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string findValue)
//{
// var response = new List<HtmlNode>();
// var xPath = $"//{nodeName}[@{attrName}]";
// var hnc = document.DocumentNode.SelectNodes(xPath);
// if (hnc == null)
// {
// return response;
// }
// if (hnc.Count <= 0)
// {
// return response;
// }
// foreach (HtmlNode item in hnc)
// {
// if (!item.Attributes.Contains(attrName))
// {
// continue;
// }
// if (!ContainsWord(item.Attributes[attrName].Value ?? string.Empty, findValue))
// {
// continue;
// }
// response.Add(item);
// }
// return response;
//}
private List<HtmlNode> FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName) private List<HtmlNode> FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName)
{ {
var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]"); var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]");
@ -453,6 +352,52 @@ namespace BookmarkManager.Services
return hnc.ToList(); return hnc.ToList();
} }
private string FindNodeAttrValue_ContainsWord(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, string matchEqualList, string defaultValue = "")
{
var linkNodes = FindNode(document, nodeName, attrName);
foreach (var item in linkNodes)
{
var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty;
if (!ContainsWord(relValue, matchEqualList))
{
continue;
}
var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(hrefValue))
{
continue;
}
return System.Web.HttpUtility.HtmlDecode(hrefValue);
}
return defaultValue;
}
private string FindNodeAttrValue_Equals(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, List<string> matchValueList, string defaultValue = "")
{
var linkNodes = FindNode(document, nodeName, attrName);
foreach (var item in linkNodes)
{
var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty;
if (!matchValueList.Contains(relValue?.ToLower() ?? string.Empty))
{
continue;
}
var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(hrefValue))
{
continue;
}
return System.Web.HttpUtility.HtmlDecode(hrefValue);
}
return defaultValue;
}
private bool ContainsWord(string haystack, string needle) private bool ContainsWord(string haystack, string needle)
{ {
haystack = haystack?.Trim() ?? string.Empty; haystack = haystack?.Trim() ?? string.Empty;
@ -478,6 +423,5 @@ namespace BookmarkManager.Services
return false; return false;
} }
} }
} }