diff --git a/microdata.go b/microdata.go
index a42d024..3d7d706 100644
--- a/microdata.go
+++ b/microdata.go
@@ -7,21 +7,19 @@ import (
"strings"
)
-
-
type ValueList []interface{}
type PropertyMap map[string]ValueList
type Item struct {
properties PropertyMap
- types []string
- id string
+ types []string
+ id string
}
func NewItem() *Item {
return &Item{
properties: make(PropertyMap, 0),
- types: make([]string, 0),
+ types: make([]string, 0),
}
}
@@ -40,14 +38,14 @@ func NewMicrodata() *Microdata {
}
type Parser struct {
- p *h5.Parser
- data *Microdata
+ p *h5.Parser
+ data *Microdata
identifiedNodes map[string]*h5.Node
}
func NewParser(r io.Reader) *Parser {
- return &Parser {
- p : h5.NewParser(r),
+ return &Parser{
+ p: h5.NewParser(r),
data: NewMicrodata(),
}
}
@@ -62,8 +60,7 @@ func (self *Parser) Parse() (*Microdata, error) {
topLevelItemNodes := make([]*h5.Node, 0)
self.identifiedNodes = make(map[string]*h5.Node, 0)
-
- tree.Walk( func(n *h5.Node) {
+ tree.Walk(func(n *h5.Node) {
if _, exists := getAttr("itemscope", n); exists {
if _, exists := getAttr("itemprop", n); !exists {
topLevelItemNodes = append(topLevelItemNodes, n)
@@ -73,7 +70,7 @@ func (self *Parser) Parse() (*Microdata, error) {
if id, exists := getAttr("id", n); exists {
self.identifiedNodes[id] = n
}
- })
+ })
for _, node := range topLevelItemNodes {
item := NewItem()
@@ -86,62 +83,64 @@ func (self *Parser) Parse() (*Microdata, error) {
}
}
// itemid only valid when itemscope and itemtype are both present
- if itemid, exists := getAttr("itemid", node); exists {
+ if itemid, exists := getAttr("itemid", node); exists {
item.id = strings.TrimSpace(itemid)
}
-
- }
- if itemref, exists := getAttr("itemref", node); exists {
- if refnode, exists := self.identifiedNodes[itemref]; exists {
- self.readItem(item, refnode)
+ }
+
+ if itemrefs, exists := getAttr("itemref", node); exists {
+ for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
+ itemref = strings.TrimSpace(itemref)
+
+ if refnode, exists := self.identifiedNodes[itemref]; exists {
+ self.readItem(item, refnode)
+ }
}
}
if len(node.Children) > 0 {
- for _, child := range node.Children {
- self.readItem(item, child)
- }
- }
+ for _, child := range node.Children {
+ self.readItem(item, child)
+ }
+ }
}
return self.data, nil
}
-
-
func (self *Parser) readItem(item *Item, node *h5.Node) {
if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string
-
+
switch node.Data() {
- case "img","audio", "source", "video", "embed", "iframe", "track":
+ case "img", "audio", "source", "video", "embed", "iframe", "track":
if urlValue, exists := getAttr("src", node); exists {
propertyValue = urlValue
- }
+ }
case "a", "area", "link":
if urlValue, exists := getAttr("href", node); exists {
propertyValue = urlValue
- }
+ }
case "data":
if urlValue, exists := getAttr("value", node); exists {
propertyValue = urlValue
- }
+ }
case "time":
if urlValue, exists := getAttr("datetime", node); exists {
propertyValue = urlValue
- }
+ }
default:
var text bytes.Buffer
- node.Walk( func(n *h5.Node) {
- if n.Type == h5.TextNode {
- text.WriteString(n.Data())
- }
+ node.Walk(func(n *h5.Node) {
+ if n.Type == h5.TextNode {
+ text.WriteString(n.Data())
+ }
- })
- propertyValue = text.String()
+ })
+ propertyValue = text.String()
}
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
@@ -152,13 +151,11 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
}
}
-
-
if len(node.Children) > 0 {
- for _, child := range node.Children {
- self.readItem(item, child)
- }
- }
+ for _, child := range node.Children {
+ self.readItem(item, child)
+ }
+ }
}
func getAttr(name string, node *h5.Node) (string, bool) {
@@ -169,4 +166,3 @@ func getAttr(name string, node *h5.Node) (string, bool) {
}
return "", false
}
-
diff --git a/microdata_test.go b/microdata_test.go
index e0502f3..380e6b1 100644
--- a/microdata_test.go
+++ b/microdata_test.go
@@ -5,7 +5,6 @@ import (
"testing"
)
-
func ParseData(html string, t *testing.T) *Microdata {
p := NewParser(strings.NewReader(html))
@@ -40,7 +39,6 @@ func TestParse(t *testing.T) {
}
-
func TestParseActuallyParses(t *testing.T) {
html := `
@@ -54,7 +52,6 @@ func TestParseActuallyParses(t *testing.T) {
}
-
func TestParseThreeProps(t *testing.T) {
html := `
@@ -78,7 +75,6 @@ func TestParseThreeProps(t *testing.T) {
}
}
-
func TestParseImgSrc(t *testing.T) {
html := `
@@ -158,7 +154,6 @@ func TestParseSourceSrc(t *testing.T) {
}
}
-
func TestParseVideoSrc(t *testing.T) {
html := `
@@ -237,8 +232,6 @@ func TestParseTimeDatetime(t *testing.T) {
}
}
-
-
func TestParseTwoValues(t *testing.T) {
html := `
@@ -251,7 +244,7 @@ func TestParseTwoValues(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.properties["flavor"]) != 2 {
- t.Errorf("Expecting 2 values but got %d",len(item.properties["flavor"]) )
+ t.Errorf("Expecting 2 values but got %d", len(item.properties["flavor"]))
}
if item.properties["flavor"][0].(string) != "Lemon sorbet" {
t.Errorf("Property value 'Lemon sorbet' not found")
@@ -260,7 +253,6 @@ func TestParseTwoValues(t *testing.T) {
t.Errorf("Property value 'Apricot sorbet' not found")
}
-
}
func TestParseTwoPropertiesOneValue(t *testing.T) {
@@ -271,13 +263,13 @@ func TestParseTwoPropertiesOneValue(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.properties) != 2 {
- t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
+ t.Errorf("Expecting 2 properties but got %d", len(item.properties))
}
if len(item.properties["favorite-color"]) != 1 {
- t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
+ t.Errorf("Expecting 1 value but got %d", len(item.properties["favorite-color"]))
}
if len(item.properties["favorite-fruit"]) != 1 {
- t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
+ t.Errorf("Expecting 1 value but got %d", len(item.properties["favorite-fruit"]))
}
if item.properties["favorite-color"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-color'")
@@ -295,14 +287,14 @@ func TestParseTwoPropertiesOneValueMultispaced(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.properties) != 2 {
- t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
+ t.Errorf("Expecting 2 properties but got %d", len(item.properties))
}
if len(item.properties["favorite-color"]) != 1 {
- t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
+ t.Errorf("Expecting 1 value but got %d", len(item.properties["favorite-color"]))
}
if len(item.properties["favorite-fruit"]) != 1 {
- t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
+ t.Errorf("Expecting 1 value but got %d", len(item.properties["favorite-fruit"]))
}
if item.properties["favorite-color"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-color'")
@@ -320,11 +312,11 @@ func TestParseItemType(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.types) != 1 {
- t.Errorf("Expecting 1 type but got %d",len(item.types) )
+ t.Errorf("Expecting 1 type but got %d", len(item.types))
}
if item.types[0] != "http://example.org/animals#cat" {
- t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[0])
+ t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d", item.types[0])
}
}
@@ -336,14 +328,14 @@ func TestParseMultipleItemTypes(t *testing.T) {
item := ParseOneItem(html, t)
if len(item.types) != 2 {
- t.Errorf("Expecting 2 types but got %d",len(item.types) )
+ t.Errorf("Expecting 2 types but got %d", len(item.types))
}
if item.types[0] != "http://example.org/animals#mammal" {
- t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d",item.types[0])
+ t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d", item.types[0])
}
if item.types[1] != "http://example.org/animals#cat" {
- t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[1])
+ t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d", item.types[1])
}
}
@@ -362,12 +354,10 @@ func TestParseItemId(t *testing.T) {
item := ParseOneItem(html, t)
if item.id != "urn:isbn:0-330-34032-8" {
- t.Errorf("Expecting id of 'urn:isbn:0-330-34032-8' but got %d",item.id)
+ t.Errorf("Expecting id of 'urn:isbn:0-330-34032-8' but got %d", item.id)
}
}
-
-
func TestParseItemRef(t *testing.T) {
html := `