@ -1,6 +1,9 @@
using System ;
using System.Collections.Generic ;
using System.Drawing ;
using System.Linq ;
using System.Net ;
using System.Security.Policy ;
using System.Threading.Tasks ;
using HtmlAgilityPack ;
using RyzStudio.Net ;
@ -148,7 +151,7 @@ namespace BookmarkManager.Services
return result ;
}
public async Task < Image > RetrieveImage ( HtmlAgilityPack . HtmlDocument document )
public async Task < Image > RetrieveImage ( string url , HtmlAgilityPack . HtmlDocument document )
{
var iconUrl = this . ParseFavicon ( document ) ;
if ( string . IsNullOrWhiteSpace ( iconUrl ) )
@ -156,6 +159,18 @@ namespace BookmarkManager.Services
return null ;
}
try
{
var baseUri = new Uri ( url ) ;
var absoluteUri = new Uri ( baseUri , iconUrl ) ;
iconUrl = absoluteUri . AbsoluteUri ;
}
catch
{
return null ;
}
return await this . RetrieveImage ( iconUrl ) ;
}
@ -169,25 +184,25 @@ namespace BookmarkManager.Services
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@property='og:title']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@property='og:title']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@name='twitter:title']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@name='twitter:title']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@property='og:site_name']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@property='og:site_name']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@itemprop='name']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@itemprop='name']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
@ -200,31 +215,31 @@ namespace BookmarkManager.Services
{
string result = null ;
result = ParseTagValue_Attr ( document , "//meta[@name='description']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@name='description']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@property='og:description']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@property='og:description']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@name='twitter:description']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@name='twitter:description']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@property='og:description']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@property='og:description']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[@itemprop='description']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@itemprop='description']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
@ -237,49 +252,85 @@ namespace BookmarkManager.Services
{
string result = null ;
result = ParseTagValue_Attr ( document , "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']" , "href" , string . Empty ) ? . Trim ( ) ;
//var tt1 = FindNode_AtrributeContains(document, "//link[contains(@rel, 'icon')]", "href", string.Empty);
//var tt1 = FindNode_AtrributeContains(document, "link", "rel", "icon");
// Find link-rel contains "icon"
var linkNodes = FindNode ( document , "link" , "rel" ) ;
foreach ( var item in linkNodes )
{
var relValue = item . Attributes [ "rel" ] . Value ? . Trim ( ) ? ? string . Empty ;
if ( ! ContainsWord ( relValue , "icon" ) )
{
continue ;
}
var hrefValue = item . Attributes [ "href" ] . Value ? . Trim ( ) ? ? string . Empty ;
if ( string . IsNullOrWhiteSpace ( hrefValue ) )
{
continue ;
}
return System . Web . HttpUtility . HtmlDecode ( hrefValue ) ;
}
// Find link-rel contains apple-icon
var appleIconPatterns = new List < string > ( ) { "apple-touch-icon" , "apple-touch-icon-precomposed" } ;
foreach ( var item in linkNodes )
{
var relValue = item . Attributes [ "rel" ] . Value ? . Trim ( ) ? ? string . Empty ;
if ( ! appleIconPatterns . Contains ( relValue ? . ToLower ( ) ? ? string . Empty ) )
{
continue ;
}
var hrefValue = item . Attributes [ "href" ] . Value ? . Trim ( ) ? ? string . Empty ;
if ( string . IsNullOrWhiteSpace ( hrefValue ) )
{
continue ;
}
return System . Web . HttpUtility . HtmlDecode ( hrefValue ) ;
}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
//result = ParseTagValue_Attr(document, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty)?.Trim();
//if (!string.IsNullOrWhiteSpace(result))
//{
// return result;
//}
result = FindNodeAttrValue ( document , "//meta[@property='og:image']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']" , "href" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue( document , "//meta[@name='twitter:image']" , "content ", string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']" , "href" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']" , "href" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[translate(@name, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'twitter:image']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
}
result = ParseTagValue_Attr ( document , "//meta[translate(@itemprop, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'image']" , "content" , string . Empty ) ? . Trim ( ) ;
result = FindNodeAttrValue ( document , "//meta[@itemprop='image']" , "content" , string . Empty ) ? . Trim ( ) ;
if ( ! string . IsNullOrWhiteSpace ( result ) )
{
return result ;
@ -320,7 +371,7 @@ namespace BookmarkManager.Services
return defaultValue ;
}
private string ParseTagValue_Attr ( HtmlAgilityPack . HtmlDocument document , string xPath , string attr , string defaultValue = "" )
private string FindNodeAttrValue ( HtmlAgilityPack . HtmlDocument document , string xPath , string attr , string defaultValue = "" )
{
var hnc = document . DocumentNode . SelectNodes ( xPath ) ;
if ( hnc = = null )
@ -351,5 +402,82 @@ namespace BookmarkManager.Services
return defaultValue ;
}
//private List<HtmlNode> FindNode_AtrributeContains(HtmlAgilityPack.HtmlDocument document, string nodeName, string attrName, string findValue)
//{
// var response = new List<HtmlNode>();
// var xPath = $"//{nodeName}[@{attrName}]";
// var hnc = document.DocumentNode.SelectNodes(xPath);
// if (hnc == null)
// {
// return response;
// }
// if (hnc.Count <= 0)
// {
// return response;
// }
// foreach (HtmlNode item in hnc)
// {
// if (!item.Attributes.Contains(attrName))
// {
// continue;
// }
// if (!ContainsWord(item.Attributes[attrName].Value ?? string.Empty, findValue))
// {
// continue;
// }
// response.Add(item);
// }
// return response;
//}
private List < HtmlNode > FindNode ( HtmlAgilityPack . HtmlDocument document , string nodeName , string attrName )
{
var xPath = ( string . IsNullOrWhiteSpace ( attrName ) ? $"//{nodeName}" : $"//{nodeName}[@{attrName}]" ) ;
var hnc = document . DocumentNode . SelectNodes ( xPath ) ;
if ( hnc = = null )
{
return new List < HtmlNode > ( ) ;
}
if ( hnc . Count < = 0 )
{
return new List < HtmlNode > ( ) ;
}
return hnc . ToList ( ) ;
}
private bool ContainsWord ( string haystack , string needle )
{
haystack = haystack ? . Trim ( ) ? ? string . Empty ;
if ( ! haystack . Contains ( " " ) )
{
return haystack . Equals ( needle , StringComparison . CurrentCultureIgnoreCase ) ;
}
foreach ( var item in haystack . Split ( " " ) )
{
if ( string . IsNullOrWhiteSpace ( item ) )
{
continue ;
}
if ( item . Equals ( needle , StringComparison . CurrentCultureIgnoreCase ) )
{
return true ;
}
}
return false ;
}
}
}