Added multi-valued itemrefs

master
Ian Davis 2012-06-10 14:59:30 +01:00
parent aec670c37a
commit 5a35df7849
2 changed files with 79 additions and 72 deletions

View File

@ -7,8 +7,6 @@ import (
"strings" "strings"
) )
type ValueList []interface{} type ValueList []interface{}
type PropertyMap map[string]ValueList type PropertyMap map[string]ValueList
@ -62,7 +60,6 @@ func (self *Parser) Parse() (*Microdata, error) {
topLevelItemNodes := make([]*h5.Node, 0) topLevelItemNodes := make([]*h5.Node, 0)
self.identifiedNodes = make(map[string]*h5.Node, 0) self.identifiedNodes = make(map[string]*h5.Node, 0)
tree.Walk(func(n *h5.Node) { tree.Walk(func(n *h5.Node) {
if _, exists := getAttr("itemscope", n); exists { if _, exists := getAttr("itemscope", n); exists {
if _, exists := getAttr("itemprop", n); !exists { if _, exists := getAttr("itemprop", n); !exists {
@ -92,11 +89,15 @@ func (self *Parser) Parse() (*Microdata, error) {
} }
if itemref, exists := getAttr("itemref", node); exists { if itemrefs, exists := getAttr("itemref", node); exists {
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
itemref = strings.TrimSpace(itemref)
if refnode, exists := self.identifiedNodes[itemref]; exists { if refnode, exists := self.identifiedNodes[itemref]; exists {
self.readItem(item, refnode) self.readItem(item, refnode)
} }
} }
}
if len(node.Children) > 0 { if len(node.Children) > 0 {
for _, child := range node.Children { for _, child := range node.Children {
@ -108,8 +109,6 @@ func (self *Parser) Parse() (*Microdata, error) {
return self.data, nil return self.data, nil
} }
func (self *Parser) readItem(item *Item, node *h5.Node) { func (self *Parser) readItem(item *Item, node *h5.Node) {
if itemprop, exists := getAttr("itemprop", node); exists { if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string var propertyValue string
@ -152,8 +151,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
} }
} }
if len(node.Children) > 0 { if len(node.Children) > 0 {
for _, child := range node.Children { for _, child := range node.Children {
self.readItem(item, child) self.readItem(item, child)
@ -169,4 +166,3 @@ func getAttr(name string, node *h5.Node) (string, bool) {
} }
return "", false return "", false
} }

View File

@ -5,7 +5,6 @@ import (
"testing" "testing"
) )
func ParseData(html string, t *testing.T) *Microdata { func ParseData(html string, t *testing.T) *Microdata {
p := NewParser(strings.NewReader(html)) p := NewParser(strings.NewReader(html))
@ -40,7 +39,6 @@ func TestParse(t *testing.T) {
} }
func TestParseActuallyParses(t *testing.T) { func TestParseActuallyParses(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -54,7 +52,6 @@ func TestParseActuallyParses(t *testing.T) {
} }
func TestParseThreeProps(t *testing.T) { func TestParseThreeProps(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -78,7 +75,6 @@ func TestParseThreeProps(t *testing.T) {
} }
} }
func TestParseImgSrc(t *testing.T) { func TestParseImgSrc(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -158,7 +154,6 @@ func TestParseSourceSrc(t *testing.T) {
} }
} }
func TestParseVideoSrc(t *testing.T) { func TestParseVideoSrc(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -237,8 +232,6 @@ func TestParseTimeDatetime(t *testing.T) {
} }
} }
func TestParseTwoValues(t *testing.T) { func TestParseTwoValues(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -260,7 +253,6 @@ func TestParseTwoValues(t *testing.T) {
t.Errorf("Property value 'Apricot sorbet' not found") t.Errorf("Property value 'Apricot sorbet' not found")
} }
} }
func TestParseTwoPropertiesOneValue(t *testing.T) { func TestParseTwoPropertiesOneValue(t *testing.T) {
@ -366,8 +358,6 @@ func TestParseItemId(t *testing.T) {
} }
} }
func TestParseItemRef(t *testing.T) { func TestParseItemRef(t *testing.T) {
html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses"> html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses">
<img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest."> <img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest.">
@ -379,7 +369,6 @@ func TestParseItemRef(t *testing.T) {
item := ParseOneItem(html, t) item := ParseOneItem(html, t)
if len(item.properties) != 3 { if len(item.properties) != 3 {
t.Errorf("Expecting 3 properties but got %d", len(item.properties)) t.Errorf("Expecting 3 properties but got %d", len(item.properties))
} }
@ -435,3 +424,25 @@ func TestParseSharedItemRef(t *testing.T) {
} }
} }
func TestParseMultiValuedItemRef(t *testing.T) {
html := `<!DOCTYPE HTML>
<html>
<body>
<div itemscope id="amanda" itemref="a b"></div>
<p id="a">Name: <span itemprop="name">Amanda</span></p>
<p id="b">Age: <span itemprop="age">26</span></p>
</body>
</html>`
data := ParseData(html, t)
if data.items[0].properties["name"][0].(string) != "Amanda" {
t.Errorf("Property value 'Amanda' not found for 'name'")
}
if data.items[0].properties["age"][0].(string) != "26" {
t.Errorf("Property value '26' not found for 'age'")
}
}