From dc3bbfce4df7734ec2c140d52705e439adb443b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=BC=D0=B0=D0=BD=20=D0=91=D0=BE=D1=80=D0=BE?= =?UTF-8?q?=D0=B4=D0=B8=D0=BD?= Date: Wed, 15 May 2024 16:35:31 +0300 Subject: [PATCH] =?UTF-8?q?=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB=D0=BD=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20=D0=B2=D0=B0=D1=80?= =?UTF-8?q?=D0=B8=D0=B0=D0=BD=D1=82=D1=8B=20=D0=BF=D0=BE=D0=B8=D1=81=D0=BA?= =?UTF-8?q?=D0=B0=20=D0=BA=D0=BE=D0=BD=D1=82=D0=B5=D0=BD=D1=82=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- microdata.go | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/microdata.go b/microdata.go index 329f019..c2b0022 100644 --- a/microdata.go +++ b/microdata.go @@ -31,7 +31,7 @@ type Item struct { // NewItem creates a new microdata item func NewItem() *Item { return &Item{ - Properties: make(propertyMap, 0), + Properties: make(propertyMap), Types: make([]string, 0), } } @@ -104,7 +104,7 @@ func (p *Parser) Parse() (*Microdata, error) { } topLevelItemNodes := make([]*html.Node, 0) - p.identifiedNodes = make(map[string]*html.Node, 0) + p.identifiedNodes = make(map[string]*html.Node) walk(tree, func(n *html.Node) { if n.Type == html.ElementNode { @@ -171,39 +171,47 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item { } } } else { - var propertyValue string + var propertyValue *string switch node.DataAtom { case atom.Meta: if val, exists := getAttr("content", node); exists { - propertyValue = val + propertyValue = &val } case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video: if urlValue, exists := getAttr("src", node); exists { if parsedURL, err := p.base.Parse(urlValue); err == nil { - propertyValue = parsedURL.String() + parsedStr := parsedURL.String() + propertyValue = &parsedStr } } case atom.A, atom.Area, atom.Link: if urlValue, exists := getAttr("href", node); exists { if parsedURL, err := p.base.Parse(urlValue); err == nil { - propertyValue = parsedURL.String() + parsedStr := parsedURL.String() + propertyValue = &parsedStr } } case atom.Object: if urlValue, exists := getAttr("data", node); exists { - propertyValue = urlValue + propertyValue = &urlValue } case atom.Data, atom.Meter: if urlValue, exists := getAttr("value", node); exists { - propertyValue = urlValue + propertyValue = &urlValue } case atom.Time: if urlValue, exists := getAttr("datetime", node); exists { - propertyValue = urlValue + propertyValue = &urlValue } default: + // The "content" attribute can be found on other tags besides the meta tag. + if val, ok := getAttr("content", node); ok { + propertyValue = &val + break + } + var text bytes.Buffer walk(node, func(n *html.Node) { if n.Type == html.TextNode { @@ -211,14 +219,22 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item { } }) - propertyValue = text.String() + + val := text.String() + propertyValue = &val } - if len(propertyValue) > 0 { + if propertyValue == nil { + if val, ok := getAttr("content", node); ok { + propertyValue = &val + } + } + + if propertyValue != nil { for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") { propertyName = strings.TrimSpace(propertyName) if propertyName != "" { - item.AddString(propertyName, propertyValue) + item.AddString(propertyName, *propertyValue) } } }