forked from ukamnya/microdata_mirror
Added multi-valued itemrefs
parent
aec670c37a
commit
5a35df7849
14
microdata.go
14
microdata.go
|
@ -7,8 +7,6 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
|
||||
|
||||
type ValueList []interface{}
|
||||
type PropertyMap map[string]ValueList
|
||||
|
||||
|
@ -62,7 +60,6 @@ func (self *Parser) Parse() (*Microdata, error) {
|
|||
topLevelItemNodes := make([]*h5.Node, 0)
|
||||
self.identifiedNodes = make(map[string]*h5.Node, 0)
|
||||
|
||||
|
||||
tree.Walk(func(n *h5.Node) {
|
||||
if _, exists := getAttr("itemscope", n); exists {
|
||||
if _, exists := getAttr("itemprop", n); !exists {
|
||||
|
@ -92,11 +89,15 @@ func (self *Parser) Parse() (*Microdata, error) {
|
|||
|
||||
}
|
||||
|
||||
if itemref, exists := getAttr("itemref", node); exists {
|
||||
if itemrefs, exists := getAttr("itemref", node); exists {
|
||||
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
|
||||
itemref = strings.TrimSpace(itemref)
|
||||
|
||||
if refnode, exists := self.identifiedNodes[itemref]; exists {
|
||||
self.readItem(item, refnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(node.Children) > 0 {
|
||||
for _, child := range node.Children {
|
||||
|
@ -108,8 +109,6 @@ func (self *Parser) Parse() (*Microdata, error) {
|
|||
return self.data, nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
func (self *Parser) readItem(item *Item, node *h5.Node) {
|
||||
if itemprop, exists := getAttr("itemprop", node); exists {
|
||||
var propertyValue string
|
||||
|
@ -152,8 +151,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if len(node.Children) > 0 {
|
||||
for _, child := range node.Children {
|
||||
self.readItem(item, child)
|
||||
|
@ -169,4 +166,3 @@ func getAttr(name string, node *h5.Node) (string, bool) {
|
|||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
|
||||
func ParseData(html string, t *testing.T) *Microdata {
|
||||
p := NewParser(strings.NewReader(html))
|
||||
|
||||
|
@ -40,7 +39,6 @@ func TestParse(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
|
||||
func TestParseActuallyParses(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -54,7 +52,6 @@ func TestParseActuallyParses(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
|
||||
func TestParseThreeProps(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -78,7 +75,6 @@ func TestParseThreeProps(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
func TestParseImgSrc(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -158,7 +154,6 @@ func TestParseSourceSrc(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
func TestParseVideoSrc(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -237,8 +232,6 @@ func TestParseTimeDatetime(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func TestParseTwoValues(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -260,7 +253,6 @@ func TestParseTwoValues(t *testing.T) {
|
|||
t.Errorf("Property value 'Apricot sorbet' not found")
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
func TestParseTwoPropertiesOneValue(t *testing.T) {
|
||||
|
@ -366,8 +358,6 @@ func TestParseItemId(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func TestParseItemRef(t *testing.T) {
|
||||
html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses">
|
||||
<img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest.">
|
||||
|
@ -379,7 +369,6 @@ func TestParseItemRef(t *testing.T) {
|
|||
|
||||
item := ParseOneItem(html, t)
|
||||
|
||||
|
||||
if len(item.properties) != 3 {
|
||||
t.Errorf("Expecting 3 properties but got %d", len(item.properties))
|
||||
}
|
||||
|
@ -435,3 +424,25 @@ func TestParseSharedItemRef(t *testing.T) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
func TestParseMultiValuedItemRef(t *testing.T) {
|
||||
html := `<!DOCTYPE HTML>
|
||||
<html>
|
||||
<body>
|
||||
<div itemscope id="amanda" itemref="a b"></div>
|
||||
<p id="a">Name: <span itemprop="name">Amanda</span></p>
|
||||
<p id="b">Age: <span itemprop="age">26</span></p>
|
||||
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
data := ParseData(html, t)
|
||||
|
||||
if data.items[0].properties["name"][0].(string) != "Amanda" {
|
||||
t.Errorf("Property value 'Amanda' not found for 'name'")
|
||||
}
|
||||
|
||||
if data.items[0].properties["age"][0].(string) != "26" {
|
||||
t.Errorf("Property value '26' not found for 'age'")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue