Added multi-valued itemrefs

master
Ian Davis 2012-06-10 14:59:30 +01:00
parent aec670c37a
commit 5a35df7849
2 changed files with 79 additions and 72 deletions

View File

@ -7,8 +7,6 @@ import (
"strings"
)
type ValueList []interface{}
type PropertyMap map[string]ValueList
@ -62,7 +60,6 @@ func (self *Parser) Parse() (*Microdata, error) {
topLevelItemNodes := make([]*h5.Node, 0)
self.identifiedNodes = make(map[string]*h5.Node, 0)
tree.Walk(func(n *h5.Node) {
if _, exists := getAttr("itemscope", n); exists {
if _, exists := getAttr("itemprop", n); !exists {
@ -92,11 +89,15 @@ func (self *Parser) Parse() (*Microdata, error) {
}
if itemref, exists := getAttr("itemref", node); exists {
if itemrefs, exists := getAttr("itemref", node); exists {
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
itemref = strings.TrimSpace(itemref)
if refnode, exists := self.identifiedNodes[itemref]; exists {
self.readItem(item, refnode)
}
}
}
if len(node.Children) > 0 {
for _, child := range node.Children {
@ -108,8 +109,6 @@ func (self *Parser) Parse() (*Microdata, error) {
return self.data, nil
}
func (self *Parser) readItem(item *Item, node *h5.Node) {
if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string
@ -152,8 +151,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
}
}
if len(node.Children) > 0 {
for _, child := range node.Children {
self.readItem(item, child)
@ -169,4 +166,3 @@ func getAttr(name string, node *h5.Node) (string, bool) {
}
return "", false
}

View File

@ -5,7 +5,6 @@ import (
"testing"
)
func ParseData(html string, t *testing.T) *Microdata {
p := NewParser(strings.NewReader(html))
@ -40,7 +39,6 @@ func TestParse(t *testing.T) {
}
func TestParseActuallyParses(t *testing.T) {
html := `
<div itemscope>
@ -54,7 +52,6 @@ func TestParseActuallyParses(t *testing.T) {
}
func TestParseThreeProps(t *testing.T) {
html := `
<div itemscope>
@ -78,7 +75,6 @@ func TestParseThreeProps(t *testing.T) {
}
}
func TestParseImgSrc(t *testing.T) {
html := `
<div itemscope>
@ -158,7 +154,6 @@ func TestParseSourceSrc(t *testing.T) {
}
}
func TestParseVideoSrc(t *testing.T) {
html := `
<div itemscope>
@ -237,8 +232,6 @@ func TestParseTimeDatetime(t *testing.T) {
}
}
func TestParseTwoValues(t *testing.T) {
html := `
<div itemscope>
@ -260,7 +253,6 @@ func TestParseTwoValues(t *testing.T) {
t.Errorf("Property value 'Apricot sorbet' not found")
}
}
func TestParseTwoPropertiesOneValue(t *testing.T) {
@ -366,8 +358,6 @@ func TestParseItemId(t *testing.T) {
}
}
func TestParseItemRef(t *testing.T) {
html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses">
<img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest.">
@ -379,7 +369,6 @@ func TestParseItemRef(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.properties) != 3 {
t.Errorf("Expecting 3 properties but got %d", len(item.properties))
}
@ -435,3 +424,25 @@ func TestParseSharedItemRef(t *testing.T) {
}
}
func TestParseMultiValuedItemRef(t *testing.T) {
html := `<!DOCTYPE HTML>
<html>
<body>
<div itemscope id="amanda" itemref="a b"></div>
<p id="a">Name: <span itemprop="name">Amanda</span></p>
<p id="b">Age: <span itemprop="age">26</span></p>
</body>
</html>`
data := ParseData(html, t)
if data.items[0].properties["name"][0].(string) != "Amanda" {
t.Errorf("Property value 'Amanda' not found for 'name'")
}
if data.items[0].properties["age"][0].(string) != "26" {
t.Errorf("Property value '26' not found for 'age'")
}
}