дополнительные варианты поиска контента
parent
546a05f94b
commit
dc3bbfce4d
40
microdata.go
40
microdata.go
|
@ -31,7 +31,7 @@ type Item struct {
|
||||||
// NewItem creates a new microdata item
|
// NewItem creates a new microdata item
|
||||||
func NewItem() *Item {
|
func NewItem() *Item {
|
||||||
return &Item{
|
return &Item{
|
||||||
Properties: make(propertyMap, 0),
|
Properties: make(propertyMap),
|
||||||
Types: make([]string, 0),
|
Types: make([]string, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ func (p *Parser) Parse() (*Microdata, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
topLevelItemNodes := make([]*html.Node, 0)
|
topLevelItemNodes := make([]*html.Node, 0)
|
||||||
p.identifiedNodes = make(map[string]*html.Node, 0)
|
p.identifiedNodes = make(map[string]*html.Node)
|
||||||
|
|
||||||
walk(tree, func(n *html.Node) {
|
walk(tree, func(n *html.Node) {
|
||||||
if n.Type == html.ElementNode {
|
if n.Type == html.ElementNode {
|
||||||
|
@ -171,39 +171,47 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
var propertyValue string
|
var propertyValue *string
|
||||||
|
|
||||||
switch node.DataAtom {
|
switch node.DataAtom {
|
||||||
case atom.Meta:
|
case atom.Meta:
|
||||||
if val, exists := getAttr("content", node); exists {
|
if val, exists := getAttr("content", node); exists {
|
||||||
propertyValue = val
|
propertyValue = &val
|
||||||
}
|
}
|
||||||
case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
|
case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
|
||||||
if urlValue, exists := getAttr("src", node); exists {
|
if urlValue, exists := getAttr("src", node); exists {
|
||||||
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
||||||
propertyValue = parsedURL.String()
|
parsedStr := parsedURL.String()
|
||||||
|
propertyValue = &parsedStr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case atom.A, atom.Area, atom.Link:
|
case atom.A, atom.Area, atom.Link:
|
||||||
if urlValue, exists := getAttr("href", node); exists {
|
if urlValue, exists := getAttr("href", node); exists {
|
||||||
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
||||||
propertyValue = parsedURL.String()
|
parsedStr := parsedURL.String()
|
||||||
|
propertyValue = &parsedStr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case atom.Object:
|
case atom.Object:
|
||||||
if urlValue, exists := getAttr("data", node); exists {
|
if urlValue, exists := getAttr("data", node); exists {
|
||||||
propertyValue = urlValue
|
propertyValue = &urlValue
|
||||||
}
|
}
|
||||||
case atom.Data, atom.Meter:
|
case atom.Data, atom.Meter:
|
||||||
if urlValue, exists := getAttr("value", node); exists {
|
if urlValue, exists := getAttr("value", node); exists {
|
||||||
propertyValue = urlValue
|
propertyValue = &urlValue
|
||||||
}
|
}
|
||||||
case atom.Time:
|
case atom.Time:
|
||||||
if urlValue, exists := getAttr("datetime", node); exists {
|
if urlValue, exists := getAttr("datetime", node); exists {
|
||||||
propertyValue = urlValue
|
propertyValue = &urlValue
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
// The "content" attribute can be found on other tags besides the meta tag.
|
||||||
|
if val, ok := getAttr("content", node); ok {
|
||||||
|
propertyValue = &val
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
var text bytes.Buffer
|
var text bytes.Buffer
|
||||||
walk(node, func(n *html.Node) {
|
walk(node, func(n *html.Node) {
|
||||||
if n.Type == html.TextNode {
|
if n.Type == html.TextNode {
|
||||||
|
@ -211,14 +219,22 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
|
||||||
}
|
}
|
||||||
|
|
||||||
})
|
})
|
||||||
propertyValue = text.String()
|
|
||||||
|
val := text.String()
|
||||||
|
propertyValue = &val
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(propertyValue) > 0 {
|
if propertyValue == nil {
|
||||||
|
if val, ok := getAttr("content", node); ok {
|
||||||
|
propertyValue = &val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if propertyValue != nil {
|
||||||
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
|
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
|
||||||
propertyName = strings.TrimSpace(propertyName)
|
propertyName = strings.TrimSpace(propertyName)
|
||||||
if propertyName != "" {
|
if propertyName != "" {
|
||||||
item.AddString(propertyName, propertyValue)
|
item.AddString(propertyName, *propertyValue)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue