Added parsing of itemtype

master
Ian Davis 2012-06-07 15:31:43 +01:00
parent a650a2c9e9
commit d607667c90
2 changed files with 87 additions and 16 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes"
"code.google.com/p/go-html-transform/h5"
"io"
"strings"
)
@ -13,11 +14,13 @@ type PropertyMap map[string]ValueList
type Item struct {
properties PropertyMap
types []string
}
func NewItem() *Item {
return &Item{
properties: make(PropertyMap, 10),
properties: make(PropertyMap, 0),
types: make([]string, 0),
}
}
@ -64,17 +67,19 @@ func (self *Parser) scanForItem(node *h5.Node) {
return
}
hasItemscope := false
for _, a := range node.Attr {
if a.Name == "itemscope" {
hasItemscope = true
break
}
}
if hasItemscope {
if _, exists := getAttr("itemscope", node); exists {
item := NewItem()
self.data.items = append(self.data.items, item)
if itemtypes, exists := getAttr("itemtype", node); exists {
for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") {
itemtype = strings.TrimSpace(itemtype)
if itemtype != "" {
item.types = append(item.types, itemtype)
}
}
}
if len(node.Children) > 0 {
@ -94,7 +99,7 @@ func (self *Parser) scanForItem(node *h5.Node) {
}
func (self *Parser) readItem(item *Item, node *h5.Node) {
if propertyName, exists := getAttr("itemprop", node); exists {
if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string
switch node.Data() {
@ -127,7 +132,12 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
propertyValue = text.String()
}
item.SetString(propertyName, propertyValue)
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName)
if propertyName != "" {
item.SetString(propertyName, propertyValue)
}
}
}
if len(node.Children) > 0 {

View File

@ -270,10 +270,13 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
</div>`
item := ReadOneItem(html, t)
if len(item.properties["favorite-color"]) != 2 {
if len(item.properties) != 2 {
t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
}
if len(item.properties["favorite-color"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
}
if len(item.properties["favorite-fruit"]) != 2 {
if len(item.properties["favorite-fruit"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
}
if item.properties["favorite-color"][0].(string) != "orange" {
@ -282,6 +285,64 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
if item.properties["favorite-fruit"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
}
}
func TestReadTwoPropertiesOneValueMultispaced(t *testing.T) {
html := `
<div itemscope>
<span itemprop=" favorite-color favorite-fruit ">orange</span>
</div>`
item := ReadOneItem(html, t)
if len(item.properties) != 2 {
t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
}
if len(item.properties["favorite-color"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
}
if len(item.properties["favorite-fruit"]) != 1 {
t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
}
if item.properties["favorite-color"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-color'")
}
if item.properties["favorite-fruit"][0].(string) != "orange" {
t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
}
}
func TestReadItemType(t *testing.T) {
html := `
<div itemscope itemtype="http://example.org/animals#cat">
<h1 itemprop="name">Hedral</h1>
</div>`
item := ReadOneItem(html, t)
if len(item.types) != 1 {
t.Errorf("Expecting 1 type but got %d",len(item.types) )
}
if item.types[0] != "http://example.org/animals#cat" {
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[0])
}
}
func TestReadMultiplrItemTypes(t *testing.T) {
html := `
<div itemscope itemtype=" http://example.org/animals#mammal http://example.org/animals#cat ">
<h1 itemprop="name">Hedral</h1>
</div>`
item := ReadOneItem(html, t)
if len(item.types) != 2 {
t.Errorf("Expecting 2 types but got %d",len(item.types) )
}
if item.types[0] != "http://example.org/animals#mammal" {
t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d",item.types[0])
}
if item.types[1] != "http://example.org/animals#cat" {
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[1])
}
}