Added parsing of itemtype

master
Ian Davis 2012-06-07 15:31:43 +01:00
parent a650a2c9e9
commit d607667c90
2 changed files with 87 additions and 16 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"code.google.com/p/go-html-transform/h5" "code.google.com/p/go-html-transform/h5"
"io" "io"
"strings"
) )
@ -13,11 +14,13 @@ type PropertyMap map[string]ValueList
type Item struct { type Item struct {
properties PropertyMap properties PropertyMap
types []string
} }
func NewItem() *Item { func NewItem() *Item {
return &Item{ return &Item{
properties: make(PropertyMap, 10), properties: make(PropertyMap, 0),
types: make([]string, 0),
} }
} }
@ -64,17 +67,19 @@ func (self *Parser) scanForItem(node *h5.Node) {
return return
} }
hasItemscope := false if _, exists := getAttr("itemscope", node); exists {
for _, a := range node.Attr {
if a.Name == "itemscope" {
hasItemscope = true
break
}
}
if hasItemscope {
item := NewItem() item := NewItem()
self.data.items = append(self.data.items, item) self.data.items = append(self.data.items, item)
if itemtypes, exists := getAttr("itemtype", node); exists {
for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") {
itemtype = strings.TrimSpace(itemtype)
if itemtype != "" {
item.types = append(item.types, itemtype)
}
}
}
if len(node.Children) > 0 { if len(node.Children) > 0 {
@ -94,7 +99,7 @@ func (self *Parser) scanForItem(node *h5.Node) {
} }
func (self *Parser) readItem(item *Item, node *h5.Node) { func (self *Parser) readItem(item *Item, node *h5.Node) {
if propertyName, exists := getAttr("itemprop", node); exists { if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string var propertyValue string
switch node.Data() { switch node.Data() {
@ -127,8 +132,13 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
propertyValue = text.String() propertyValue = text.String()
} }
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName)
if propertyName != "" {
item.SetString(propertyName, propertyValue) item.SetString(propertyName, propertyValue)
} }
}
}
if len(node.Children) > 0 { if len(node.Children) > 0 {
for _, child := range node.Children { for _, child := range node.Children {

View File

@ -270,10 +270,13 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
</div>` </div>`
item := ReadOneItem(html, t) item := ReadOneItem(html, t)
if len(item.properties["favorite-color"]) != 2 { if len(item.properties) != 2 {
t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
}
if len(item.properties["favorite-color"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) ) t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
} }
if len(item.properties["favorite-fruit"]) != 2 { if len(item.properties["favorite-fruit"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) ) t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
} }
if item.properties["favorite-color"][0].(string) != "orange" { if item.properties["favorite-color"][0].(string) != "orange" {
@ -282,6 +285,64 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
if item.properties["favorite-fruit"][0].(string) != "orange" { if item.properties["favorite-fruit"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-fruit'") t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
} }
}
func TestReadTwoPropertiesOneValueMultispaced(t *testing.T) {
html := `
<div itemscope>
<span itemprop=" favorite-color favorite-fruit ">orange</span>
</div>`
item := ReadOneItem(html, t)
if len(item.properties) != 2 {
t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
}
if len(item.properties["favorite-color"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
}
if len(item.properties["favorite-fruit"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
}
if item.properties["favorite-color"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-color'")
}
if item.properties["favorite-fruit"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
}
}
func TestReadItemType(t *testing.T) {
html := `
<div itemscope itemtype="http://example.org/animals#cat">
<h1 itemprop="name">Hedral</h1>
</div>`
item := ReadOneItem(html, t)
if len(item.types) != 1 {
t.Errorf("Expecting 1 type but got %d",len(item.types) )
}
if item.types[0] != "http://example.org/animals#cat" {
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[0])
}
}
func TestReadMultiplrItemTypes(t *testing.T) {
html := `
<div itemscope itemtype=" http://example.org/animals#mammal http://example.org/animals#cat ">
<h1 itemprop="name">Hedral</h1>
</div>`
item := ReadOneItem(html, t)
if len(item.types) != 2 {
t.Errorf("Expecting 2 types but got %d",len(item.types) )
}
if item.types[0] != "http://example.org/animals#mammal" {
t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d",item.types[0])
}
if item.types[1] != "http://example.org/animals#cat" {
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[1])
}
} }