using bzit.bomg.Models; using HtmlAgilityPack; using RyzStudio.Net; using System; using System.Drawing; using System.IO; using System.Net; using System.Text; using HtmlDocument = HtmlAgilityPack.HtmlDocument; namespace bzit.bomg { public class BookmarkItemModel : BookmarkItemViewModel { protected HttpWeb webClient = null; public BookmarkItemModel() { this.Clear(); } public void Clear() { this.SiteName = string.Empty; this.SiteAddress = string.Empty; this.SiteDescription = string.Empty; this.FaviconAddress = string.Empty; this.TreeviewPath = string.Empty; } public new string ToString() { StringBuilder sb = new StringBuilder(); sb.Append("Name = "); sb.Append(this.SiteName ?? string.Empty); sb.Append(Environment.NewLine); sb.Append("Address = "); sb.Append(this.SiteAddress ?? string.Empty); sb.Append(Environment.NewLine); sb.Append("Description = "); sb.Append(this.SiteDescription ?? string.Empty); sb.Append(Environment.NewLine); return sb.ToString(); } public BookmarkItemViewModel ToViewModel() { return new BookmarkItemViewModel() { SiteName = this.SiteName, SiteAddress = this.SiteAddress, SiteDescription = this.SiteDescription, FaviconAddress = this.FaviconAddress, TreeviewPath = this.TreeviewPath }; } public bool Update() { string sourceCode = retrieveSourceCode(); if (string.IsNullOrWhiteSpace(sourceCode)) { return false; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(sourceCode); // title this.SiteName = parseSiteTitle(document); // description this.SiteDescription = parseSiteDescription(document); // favicon this.FaviconAddress = parseSiteIcon(document); // default infurred icon if (string.IsNullOrWhiteSpace(this.FaviconAddress)) { this.FaviconAddress = "/favicon.ico"; } if (!string.IsNullOrWhiteSpace(this.FaviconAddress)) { Uri iconAddressURI; bool rv = Uri.TryCreate(new Uri(this.SiteAddress), this.FaviconAddress, out iconAddressURI); if (rv) { this.FaviconAddress = iconAddressURI.ToString(); } } return true; } public bool UpdateFavicon() { string sourceCode = retrieveSourceCode(); if (string.IsNullOrWhiteSpace(sourceCode)) { return false; } HtmlDocument document = new HtmlDocument(); document.LoadHtml(sourceCode); // favicon this.FaviconAddress = parseSiteIcon(document); // default infurred icon if (string.IsNullOrWhiteSpace(this.FaviconAddress)) { this.FaviconAddress = "/favicon.ico"; } if (!string.IsNullOrWhiteSpace(this.FaviconAddress)) { Uri iconAddressURI; bool rv = Uri.TryCreate(new Uri(this.SiteAddress), this.FaviconAddress, out iconAddressURI); if (rv) { this.FaviconAddress = iconAddressURI.ToString(); } } return true; } public Bitmap RetrieveFavicon() { if (string.IsNullOrWhiteSpace(this.FaviconAddress)) { return null; } byte[] iconData = null; WebClient webClient = new WebClient(); webClient.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore); try { iconData = webClient.DownloadData(this.FaviconAddress); if (!RyzStudio.IO.FileType.IsImage(iconData)) { throw new Exception("Not a supported image"); } Image img = Image.FromStream(new MemoryStream(iconData)); return new Bitmap(img, 16, 16); } catch { iconData = null; this.FaviconAddress = null; return null; } } public Bitmap RetrieveFavicon(out byte[] rawData) { rawData = null; if (string.IsNullOrWhiteSpace(this.FaviconAddress)) { return null; } byte[] iconData = null; WebClient webClient = new WebClient(); webClient.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore); try { iconData = webClient.DownloadData(this.FaviconAddress); if (!RyzStudio.IO.FileType.IsImage(iconData)) { throw new Exception("Not a supported image"); } rawData = iconData; Image img = Image.FromStream(new MemoryStream(iconData)); return new Bitmap(img, 16, 16); } catch { iconData = null; this.FaviconAddress = null; return null; } } protected string retrieveSourceCode() { if (webClient == null) { webClient = new HttpWeb(); } string sourceCode; int statusCode = webClient.GetResponse(out sourceCode, this.SiteAddress); if ((statusCode == 200) || (statusCode == 301) || (statusCode == 302)) { return sourceCode; } return null; //WebClient webClient = new WebClient(); //webClient.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore); //try //{ // return webClient.DownloadString(this.SiteAddress); //} //catch (Exception exc) //{ // return null; //} } protected string parseTagValue(HtmlDocument doc, string xpath, string defaultValue = "") { HtmlNodeCollection hnc = doc.DocumentNode.SelectNodes(xpath); if (hnc == null) { return defaultValue; } if (hnc.Count <= 0) { return defaultValue; } foreach (HtmlNode hn in hnc) { if (string.IsNullOrWhiteSpace(hn.InnerHtml)) { continue; } string rs = WebUtility.HtmlDecode(hn.InnerHtml)?.Replace("\r", "")?.Replace("\n", " ")?.Trim(); if (string.IsNullOrWhiteSpace(rs)) { continue; } return rs; } return defaultValue; } protected string parseTagValue_Attr(HtmlDocument doc, string xpath, string attr, string defaultValue = "") { HtmlNodeCollection hnc = doc.DocumentNode.SelectNodes(xpath); if (hnc == null) { return defaultValue; } if (hnc.Count <= 0) { return defaultValue; } foreach (HtmlNode hn in hnc) { if (hn.Attributes[attr] == null) { continue; } if (string.IsNullOrWhiteSpace(hn.Attributes[attr].Value)) { continue; } return System.Web.HttpUtility.HtmlDecode(hn.Attributes[attr].Value?.Trim()); } return defaultValue; } protected string parseSiteTitle(HtmlDocument doc) { string rs = null; if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue(doc, "//title", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@property='og:title']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@name='twitter:title']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@property='og:site_name']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@itemprop='name']", "content", string.Empty); } return rs?.Trim(); } protected string parseSiteDescription(HtmlDocument doc) { string rs = null; if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@name='description']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@property='og:description']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@name='twitter:description']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@property='og:description']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { rs = parseTagValue_Attr(doc, "//meta[@itemprop='description']", "content", string.Empty); } return rs; } protected string parseSiteIcon(HtmlDocument doc) { string rs = null; if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//link[@rel='shortcut icon']", "href", string.Empty); rs = parseTagValue_Attr(doc, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'shortcut icon']", "href", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//link[@rel='icon']", "href", string.Empty); rs = parseTagValue_Attr(doc, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'icon']", "href", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//link[@rel='apple-touch-icon']", "href", string.Empty); rs = parseTagValue_Attr(doc, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon']", "href", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//link[@rel='apple-touch-icon-precomposed']", "href", string.Empty); rs = parseTagValue_Attr(doc, "//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'apple-touch-icon-precomposed']", "href", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//meta[@property='og:image']", "content", string.Empty); rs = parseTagValue_Attr(doc, "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//meta[@name='twitter:image']", "content", string.Empty); rs = parseTagValue_Attr(doc, "//meta[translate(@name, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'twitter:image']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//meta[@property='og:image']", "content", string.Empty); rs = parseTagValue_Attr(doc, "//meta[translate(@property, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'og:image']", "content", string.Empty); } if (string.IsNullOrWhiteSpace(rs)) { //rs = parseTagValue_Attr(doc, "//meta[@itemprop='image']", "content", string.Empty); rs = parseTagValue_Attr(doc, "//meta[translate(@itemprop, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = 'image']", "content", string.Empty); } return rs; } } }