release/0.6.2 #5
@ -408,7 +408,7 @@ namespace FizzyLauncher
|
|||||||
|
|
||||||
if (updateDescription)
|
if (updateDescription)
|
||||||
{
|
{
|
||||||
textBox3.Text = _webProvider.ParseMetaDescription(document);
|
textBox3.Text = _webProvider.ParseDescription(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updateIcon)
|
if (updateIcon)
|
||||||
|
@ -3,7 +3,6 @@ using System.Collections.Generic;
|
|||||||
using System.Drawing;
|
using System.Drawing;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Security.Policy;
|
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
using RyzStudio.Net;
|
using RyzStudio.Net;
|
||||||
@ -178,168 +177,102 @@ namespace BookmarkManager.Services
|
|||||||
{
|
{
|
||||||
string result = null;
|
string result = null;
|
||||||
|
|
||||||
result = ParseTagValue(document, "//title", string.Empty)?.Trim();
|
// Find basic title
|
||||||
|
result = FindNodeValue(document, "//title", string.Empty)?.Trim();
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
if (!string.IsNullOrWhiteSpace(result))
|
||||||
{
|
{
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@property='og:title']", "content", string.Empty)?.Trim();
|
// Find title from extended meta
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
var patternList = new List<string>()
|
||||||
{
|
{
|
||||||
return result;
|
"//meta[@property='og:title']",
|
||||||
}
|
"//meta[@property='og:site_name']",
|
||||||
|
"//meta[@name='twitter:title']",
|
||||||
|
"//meta[@itemprop='name']"
|
||||||
|
};
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@name='twitter:title']", "content", string.Empty)?.Trim();
|
foreach (var item in patternList)
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
{
|
||||||
return result;
|
result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
|
||||||
}
|
if (string.IsNullOrWhiteSpace(result))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@property='og:site_name']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@itemprop='name']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
public string ParseMetaDescription(HtmlAgilityPack.HtmlDocument document)
|
public string ParseDescription(HtmlAgilityPack.HtmlDocument document)
|
||||||
{
|
{
|
||||||
string result = null;
|
var patternList = new List<string>()
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@name='description']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
{
|
||||||
|
"//meta[@name='description']",
|
||||||
|
"//meta[@property='og:description']",
|
||||||
|
"//meta[@name='twitter:description']",
|
||||||
|
"//meta[@itemprop='description']",
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach (var item in patternList)
|
||||||
|
{
|
||||||
|
var result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
|
||||||
|
if (string.IsNullOrWhiteSpace(result))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim();
|
return string.Empty;
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@name='twitter:description']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@property='og:description']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@itemprop='description']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public string ParseFavicon(HtmlAgilityPack.HtmlDocument document)
|
public string ParseFavicon(HtmlAgilityPack.HtmlDocument document)
|
||||||
{
|
{
|
||||||
string result = null;
|
string result = null;
|
||||||
|
|
||||||
//var tt1 = FindNode_AtrributeContains(document, "//link[contains(@rel, 'icon')]", "href", string.Empty);
|
// Find link-rel that contains word
|
||||||
//var tt1 = FindNode_AtrributeContains(document, "link", "rel", "icon");
|
result = FindNodeAttrValue_ContainsWord(document, "link", "rel", "href", "icon");
|
||||||
|
if (!string.IsNullOrWhiteSpace(result))
|
||||||
// Find link-rel contains "icon"
|
|
||||||
var linkNodes = FindNode(document, "link", "rel");
|
|
||||||
foreach (var item in linkNodes)
|
|
||||||
{
|
{
|
||||||
var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty;
|
return result;
|
||||||
if (!ContainsWord(relValue, "icon"))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty;
|
|
||||||
if (string.IsNullOrWhiteSpace(hrefValue))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
return System.Web.HttpUtility.HtmlDecode(hrefValue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find link-rel contains apple-icon
|
// Find link-rel contains apple-icon
|
||||||
var appleIconPatterns = new List<string>() { "apple-touch-icon", "apple-touch-icon-precomposed" };
|
var matchPatterns = new List<string>() { "apple-touch-icon", "apple-touch-icon-precomposed" };
|
||||||
|
result = FindNodeAttrValue_Equals(document, "link", "rel", "href", matchPatterns);
|
||||||
foreach (var item in linkNodes)
|
if (!string.IsNullOrWhiteSpace(result))
|
||||||
{
|
{
|
||||||
var relValue = item.Attributes["rel"].Value?.Trim() ?? string.Empty;
|
return result;
|
||||||
if (!appleIconPatterns.Contains(relValue?.ToLower() ?? string.Empty))
|
}
|
||||||
|
|
||||||
|
// Find favicon from extended meta
|
||||||
|
var patternList = new List<string>()
|
||||||
|
{
|
||||||
|
"//meta[@property='og:image']",
|
||||||
|
"//meta[@name='twitter:image']",
|
||||||
|
"//meta[@itemprop='image']"
|
||||||
|
};
|
||||||
|
|
||||||
|
foreach (var item in patternList)
|
||||||
|
{
|
||||||
|
result = FindNodeAttrValue(document, item, "content", string.Empty)?.Trim();
|
||||||
|
if (string.IsNullOrWhiteSpace(result))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
var hrefValue = item.Attributes["href"].Value?.Trim() ?? string.Empty;
|
|
||||||
if (string.IsNullOrWhiteSpace(hrefValue))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
return System.Web.HttpUtility.HtmlDecode(hrefValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim();
|
|
||||||
//if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
//{
|
|
||||||
// return result;
|
|
||||||
//}
|
|
||||||
|
|
||||||
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim();
|
|
||||||
//if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
//{
|
|
||||||
// return result;
|
|
||||||
//}
|
|
||||||
|
|
||||||
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim();
|
|
||||||
//if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
//{
|
|
||||||
// return result;
|
|
||||||
//}
|
|
||||||
|
|
||||||
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim();
|
|
||||||
//if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
//{
|
|
||||||
// return result;
|
|
||||||
//}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@property='og:image']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@name='twitter:image']", "content", string.Empty)?.Trim();
|
return string.Empty;
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = FindNodeAttrValue(document, "//meta[@itemprop='image']", "content", string.Empty)?.Trim();
|
|
||||||
if (!string.IsNullOrWhiteSpace(result))
|
|
||||||
{
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return "/favicon.ico";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private string ParseTagValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "")
|
private string FindNodeValue(HtmlAgilityPack.HtmlDocument document, string xPath, string defaultValue = "")
|
||||||
{
|
{
|
||||||
var hnc = document.DocumentNode.SelectNodes(xPath);
|
var hnc = document.DocumentNode.SelectNodes(xPath);
|
||||||
if (hnc == null)
|
if (hnc == null)
|
||||||
@ -402,40 +335,6 @@ namespace BookmarkManager.Services
|
|||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//private List<HtmlNode> FindNode_AtrributeContains(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string findValue)
|
|
||||||
//{
|
|
||||||
// var response = new List<HtmlNode>();
|
|
||||||
|
|
||||||
// var xPath = $"//{nodeName}[@{attrName}]";
|
|
||||||
// var hnc = document.DocumentNode.SelectNodes(xPath);
|
|
||||||
// if (hnc == null)
|
|
||||||
// {
|
|
||||||
// return response;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (hnc.Count <= 0)
|
|
||||||
// {
|
|
||||||
// return response;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// foreach (HtmlNode item in hnc)
|
|
||||||
// {
|
|
||||||
// if (!item.Attributes.Contains(attrName))
|
|
||||||
// {
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (!ContainsWord(item.Attributes[attrName].Value ?? string.Empty, findValue))
|
|
||||||
// {
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// response.Add(item);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return response;
|
|
||||||
//}
|
|
||||||
|
|
||||||
private List<HtmlNode> FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName)
|
private List<HtmlNode> FindNode(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName)
|
||||||
{
|
{
|
||||||
var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]");
|
var xPath = (string.IsNullOrWhiteSpace(attrName) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]");
|
||||||
@ -453,6 +352,52 @@ namespace BookmarkManager.Services
|
|||||||
return hnc.ToList();
|
return hnc.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private string FindNodeAttrValue_ContainsWord(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, string matchEqualList, string defaultValue = "")
|
||||||
|
{
|
||||||
|
var linkNodes = FindNode(document, nodeName, attrName);
|
||||||
|
foreach (var item in linkNodes)
|
||||||
|
{
|
||||||
|
var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty;
|
||||||
|
if (!ContainsWord(relValue, matchEqualList))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty;
|
||||||
|
if (string.IsNullOrWhiteSpace(hrefValue))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return System.Web.HttpUtility.HtmlDecode(hrefValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private string FindNodeAttrValue_Equals(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string returnAttrName, List<string> matchValueList, string defaultValue = "")
|
||||||
|
{
|
||||||
|
var linkNodes = FindNode(document, nodeName, attrName);
|
||||||
|
foreach (var item in linkNodes)
|
||||||
|
{
|
||||||
|
var relValue = item.Attributes[attrName].Value?.Trim() ?? string.Empty;
|
||||||
|
if (!matchValueList.Contains(relValue?.ToLower() ?? string.Empty))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var hrefValue = item.Attributes[returnAttrName].Value?.Trim() ?? string.Empty;
|
||||||
|
if (string.IsNullOrWhiteSpace(hrefValue))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return System.Web.HttpUtility.HtmlDecode(hrefValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
private bool ContainsWord(string haystack, string needle)
|
private bool ContainsWord(string haystack, string needle)
|
||||||
{
|
{
|
||||||
haystack = haystack?.Trim() ?? string.Empty;
|
haystack = haystack?.Trim() ?? string.Empty;
|
||||||
@ -478,6 +423,5 @@ namespace BookmarkManager.Services
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user