Added multi-valued itemrefs
parent
aec670c37a
commit
5a35df7849
14
microdata.go
14
microdata.go
|
@ -7,8 +7,6 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
type ValueList []interface{}
|
type ValueList []interface{}
|
||||||
type PropertyMap map[string]ValueList
|
type PropertyMap map[string]ValueList
|
||||||
|
|
||||||
|
@ -62,7 +60,6 @@ func (self *Parser) Parse() (*Microdata, error) {
|
||||||
topLevelItemNodes := make([]*h5.Node, 0)
|
topLevelItemNodes := make([]*h5.Node, 0)
|
||||||
self.identifiedNodes = make(map[string]*h5.Node, 0)
|
self.identifiedNodes = make(map[string]*h5.Node, 0)
|
||||||
|
|
||||||
|
|
||||||
tree.Walk(func(n *h5.Node) {
|
tree.Walk(func(n *h5.Node) {
|
||||||
if _, exists := getAttr("itemscope", n); exists {
|
if _, exists := getAttr("itemscope", n); exists {
|
||||||
if _, exists := getAttr("itemprop", n); !exists {
|
if _, exists := getAttr("itemprop", n); !exists {
|
||||||
|
@ -92,11 +89,15 @@ func (self *Parser) Parse() (*Microdata, error) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if itemref, exists := getAttr("itemref", node); exists {
|
if itemrefs, exists := getAttr("itemref", node); exists {
|
||||||
|
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
|
||||||
|
itemref = strings.TrimSpace(itemref)
|
||||||
|
|
||||||
if refnode, exists := self.identifiedNodes[itemref]; exists {
|
if refnode, exists := self.identifiedNodes[itemref]; exists {
|
||||||
self.readItem(item, refnode)
|
self.readItem(item, refnode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if len(node.Children) > 0 {
|
if len(node.Children) > 0 {
|
||||||
for _, child := range node.Children {
|
for _, child := range node.Children {
|
||||||
|
@ -108,8 +109,6 @@ func (self *Parser) Parse() (*Microdata, error) {
|
||||||
return self.data, nil
|
return self.data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func (self *Parser) readItem(item *Item, node *h5.Node) {
|
func (self *Parser) readItem(item *Item, node *h5.Node) {
|
||||||
if itemprop, exists := getAttr("itemprop", node); exists {
|
if itemprop, exists := getAttr("itemprop", node); exists {
|
||||||
var propertyValue string
|
var propertyValue string
|
||||||
|
@ -152,8 +151,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if len(node.Children) > 0 {
|
if len(node.Children) > 0 {
|
||||||
for _, child := range node.Children {
|
for _, child := range node.Children {
|
||||||
self.readItem(item, child)
|
self.readItem(item, child)
|
||||||
|
@ -169,4 +166,3 @@ func getAttr(name string, node *h5.Node) (string, bool) {
|
||||||
}
|
}
|
||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
func ParseData(html string, t *testing.T) *Microdata {
|
func ParseData(html string, t *testing.T) *Microdata {
|
||||||
p := NewParser(strings.NewReader(html))
|
p := NewParser(strings.NewReader(html))
|
||||||
|
|
||||||
|
@ -40,7 +39,6 @@ func TestParse(t *testing.T) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseActuallyParses(t *testing.T) {
|
func TestParseActuallyParses(t *testing.T) {
|
||||||
html := `
|
html := `
|
||||||
<div itemscope>
|
<div itemscope>
|
||||||
|
@ -54,7 +52,6 @@ func TestParseActuallyParses(t *testing.T) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseThreeProps(t *testing.T) {
|
func TestParseThreeProps(t *testing.T) {
|
||||||
html := `
|
html := `
|
||||||
<div itemscope>
|
<div itemscope>
|
||||||
|
@ -78,7 +75,6 @@ func TestParseThreeProps(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseImgSrc(t *testing.T) {
|
func TestParseImgSrc(t *testing.T) {
|
||||||
html := `
|
html := `
|
||||||
<div itemscope>
|
<div itemscope>
|
||||||
|
@ -158,7 +154,6 @@ func TestParseSourceSrc(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func TestParseVideoSrc(t *testing.T) {
|
func TestParseVideoSrc(t *testing.T) {
|
||||||
html := `
|
html := `
|
||||||
<div itemscope>
|
<div itemscope>
|
||||||
|
@ -237,8 +232,6 @@ func TestParseTimeDatetime(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func TestParseTwoValues(t *testing.T) {
|
func TestParseTwoValues(t *testing.T) {
|
||||||
html := `
|
html := `
|
||||||
<div itemscope>
|
<div itemscope>
|
||||||
|
@ -260,7 +253,6 @@ func TestParseTwoValues(t *testing.T) {
|
||||||
t.Errorf("Property value 'Apricot sorbet' not found")
|
t.Errorf("Property value 'Apricot sorbet' not found")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseTwoPropertiesOneValue(t *testing.T) {
|
func TestParseTwoPropertiesOneValue(t *testing.T) {
|
||||||
|
@ -366,8 +358,6 @@ func TestParseItemId(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func TestParseItemRef(t *testing.T) {
|
func TestParseItemRef(t *testing.T) {
|
||||||
html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses">
|
html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses">
|
||||||
<img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest.">
|
<img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest.">
|
||||||
|
@ -379,7 +369,6 @@ func TestParseItemRef(t *testing.T) {
|
||||||
|
|
||||||
item := ParseOneItem(html, t)
|
item := ParseOneItem(html, t)
|
||||||
|
|
||||||
|
|
||||||
if len(item.properties) != 3 {
|
if len(item.properties) != 3 {
|
||||||
t.Errorf("Expecting 3 properties but got %d", len(item.properties))
|
t.Errorf("Expecting 3 properties but got %d", len(item.properties))
|
||||||
}
|
}
|
||||||
|
@ -435,3 +424,25 @@ func TestParseSharedItemRef(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseMultiValuedItemRef(t *testing.T) {
|
||||||
|
html := `<!DOCTYPE HTML>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div itemscope id="amanda" itemref="a b"></div>
|
||||||
|
<p id="a">Name: <span itemprop="name">Amanda</span></p>
|
||||||
|
<p id="b">Age: <span itemprop="age">26</span></p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
|
||||||
|
data := ParseData(html, t)
|
||||||
|
|
||||||
|
if data.items[0].properties["name"][0].(string) != "Amanda" {
|
||||||
|
t.Errorf("Property value 'Amanda' not found for 'name'")
|
||||||
|
}
|
||||||
|
|
||||||
|
if data.items[0].properties["age"][0].(string) != "26" {
|
||||||
|
t.Errorf("Property value '26' not found for 'age'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue