ensure itemscope always starts a new item
parent
a5d3d8ae37
commit
ca93c08d53
56
microdata.go
56
microdata.go
|
@ -121,8 +121,18 @@ func (p *Parser) Parse() (*Microdata, error) {
|
|||
})
|
||||
|
||||
for _, node := range topLevelItemNodes {
|
||||
item := NewItem()
|
||||
p.data.Items = append(p.data.Items, item)
|
||||
p.data.Items = append(p.data.Items, p.readItem(nil, node))
|
||||
}
|
||||
|
||||
return p.data, nil
|
||||
}
|
||||
|
||||
func (p *Parser) readItem(item *Item, node *html.Node) *Item {
|
||||
var parent *Item
|
||||
|
||||
if _, exists := getAttr("itemscope", node); exists {
|
||||
parent, item = item, NewItem()
|
||||
|
||||
if itemtypes, exists := getAttr("itemtype", node); exists {
|
||||
for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") {
|
||||
itemtype = strings.TrimSpace(itemtype)
|
||||
|
@ -143,53 +153,24 @@ func (p *Parser) Parse() (*Microdata, error) {
|
|||
itemref = strings.TrimSpace(itemref)
|
||||
|
||||
if refnode, exists := p.identifiedNodes[itemref]; exists {
|
||||
if refnode != node {
|
||||
p.readItem(item, refnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; {
|
||||
p.readItem(item, child)
|
||||
child = child.NextSibling
|
||||
}
|
||||
}
|
||||
|
||||
return p.data, nil
|
||||
}
|
||||
|
||||
func (p *Parser) readItem(item *Item, node *html.Node) {
|
||||
if itemprop, exists := getAttr("itemprop", node); exists {
|
||||
if _, exists := getAttr("itemscope", node); exists {
|
||||
subitem := NewItem()
|
||||
|
||||
if itemrefs, exists := getAttr("itemref", node); exists {
|
||||
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
|
||||
itemref = strings.TrimSpace(itemref)
|
||||
|
||||
if refnode, exists := p.identifiedNodes[itemref]; exists {
|
||||
if refnode != node {
|
||||
p.readItem(subitem, refnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; {
|
||||
p.readItem(subitem, child)
|
||||
child = child.NextSibling
|
||||
}
|
||||
|
||||
if parent != nil {
|
||||
// an itemprop on an itemscope has value of the item created by the itemscope
|
||||
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
|
||||
propertyName = strings.TrimSpace(propertyName)
|
||||
if propertyName != "" {
|
||||
item.AddItem(propertyName, subitem)
|
||||
parent.AddItem(propertyName, item)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
} else {
|
||||
var propertyValue string
|
||||
|
||||
switch node.DataAtom {
|
||||
|
@ -241,7 +222,7 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; {
|
||||
|
@ -249,6 +230,7 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
|
|||
child = child.NextSibling
|
||||
}
|
||||
|
||||
return item
|
||||
}
|
||||
|
||||
func getAttr(name string, node *html.Node) (string, bool) {
|
||||
|
|
|
@ -583,8 +583,9 @@ func TestSkipSelfReferencingItemref(t *testing.T) {
|
|||
actual := ParseData(html, t)
|
||||
|
||||
child := NewItem()
|
||||
child.AddString("title", "Foo")
|
||||
child.AddType("http://data-vocabulary.org/Breadcrumb")
|
||||
child.AddString("url", "http://example.com/foo/bar")
|
||||
child.AddString("title", "Foo")
|
||||
|
||||
item := NewItem()
|
||||
item.AddType("http://schema.org/WebPage")
|
||||
|
@ -609,11 +610,9 @@ func TestPropertiesInContainedItem(t *testing.T) {
|
|||
<div itemscope itemtype="http://schema.org/Person">
|
||||
<meta itemprop="bar" content="bar value">
|
||||
</div>
|
||||
|
||||
<div itemscope itemtype="http://schema.org/Person" itemprop="author">
|
||||
<meta itemprop="baz" content="baz value">
|
||||
</div>
|
||||
|
||||
</body>`
|
||||
|
||||
actual := ParseData(html, t)
|
||||
|
|
Loading…
Reference in New Issue