Skip to content

Commit

Permalink
Add website name and icon url for output
Browse files Browse the repository at this point in the history
  • Loading branch information
chenyunchen committed Dec 3, 2018
1 parent 3638037 commit 42a7d35
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@
fmt.Println(err)
return
}
fmt.Printf("Icon : %s\n", s.Preview.Icon)
fmt.Printf("Name : %s\n", s.Preview.Name)
fmt.Printf("Title : %s\n", s.Preview.Title)
fmt.Printf("Description : %s\n", s.Preview.Description)
fmt.Printf("Image: %s\n", s.Preview.Images[0])
fmt.Printf("Url : %s\n", s.Preview.Link)
}

output:
output:

**Icon :** https://www.w3.org/favicon.ico
**Name :** www.w3.org
**Title :** World Wide Web Consortium (W3C)
**Description :** The World Wide Web Consortium (W3C) is an international community where Member organizations, a full-time staff, and the public work together to develop Web standards.
**Image:** https://www.w3.org/2008/site/images/logo-w3c-mobile-lg
**Url :** https://www.w3.org/
**Url :** https://www.w3.org/


## License
Expand Down
15 changes: 15 additions & 0 deletions goscraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type Document struct {
}

type DocumentPreview struct {
Icon string
Name string
Title string
Description string
Images []string
Expand Down Expand Up @@ -165,6 +167,10 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
doc.Preview.Images = []string{}
// saves previews' link in case that <link rel="canonical"> is found after <meta property="og:url">
link := doc.Preview.Link
// set default value to site name if <meta property="og:site_name"> not found
doc.Preview.Name = scraper.Url.Host
// set default icon to web root if <link rel="icon" href="/favicon.ico"> not found
doc.Preview.Icon = fmt.Sprintf("%s://%s%s", scraper.Url.Scheme, scraper.Url.Host, "/favicon.ico")
for {
tokenType := t.Next()
if tokenType == html.ErrorToken {
Expand All @@ -185,11 +191,15 @@ func (scraper *Scraper) parseDocument(doc *Document) error {

case "link":
var canonical bool
var hasIcon bool
var href string
for _, attr := range token.Attr {
if cleanStr(attr.Key) == "rel" && cleanStr(attr.Val) == "canonical" {
canonical = true
}
if cleanStr(attr.Key) == "rel" && cleanStr(attr.Val) == "icon" {
hasIcon = true
}
if cleanStr(attr.Key) == "href" {
href = attr.Val
}
Expand All @@ -201,6 +211,9 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
return err
}
}
if len(href) > 0 && hasIcon {
doc.Preview.Icon = href
}
}

case "meta":
Expand All @@ -221,6 +234,8 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
}
}
switch cleanStr(property) {
case "og:site_name":
doc.Preview.Name = content
case "og:title":
doc.Preview.Title = content
case "og:description":
Expand Down

0 comments on commit 42a7d35

Please sign in to comment.