diff --git a/microdata.go b/microdata.go index f79c88c..e73eb1a 100644 --- a/microdata.go +++ b/microdata.go @@ -4,6 +4,7 @@ import ( "bytes" "code.google.com/p/go-html-transform/h5" "io" + "strings" ) @@ -13,11 +14,13 @@ type PropertyMap map[string]ValueList type Item struct { properties PropertyMap + types []string } func NewItem() *Item { return &Item{ - properties: make(PropertyMap, 10), + properties: make(PropertyMap, 0), + types: make([]string, 0), } } @@ -64,17 +67,19 @@ func (self *Parser) scanForItem(node *h5.Node) { return } - hasItemscope := false - - for _, a := range node.Attr { - if a.Name == "itemscope" { - hasItemscope = true - break - } - } - if hasItemscope { + if _, exists := getAttr("itemscope", node); exists { item := NewItem() self.data.items = append(self.data.items, item) + if itemtypes, exists := getAttr("itemtype", node); exists { + for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") { + itemtype = strings.TrimSpace(itemtype) + if itemtype != "" { + item.types = append(item.types, itemtype) + } + } + + + } if len(node.Children) > 0 { @@ -94,7 +99,7 @@ func (self *Parser) scanForItem(node *h5.Node) { } func (self *Parser) readItem(item *Item, node *h5.Node) { - if propertyName, exists := getAttr("itemprop", node); exists { + if itemprop, exists := getAttr("itemprop", node); exists { var propertyValue string switch node.Data() { @@ -127,7 +132,12 @@ func (self *Parser) readItem(item *Item, node *h5.Node) { propertyValue = text.String() } - item.SetString(propertyName, propertyValue) + for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") { + propertyName = strings.TrimSpace(propertyName) + if propertyName != "" { + item.SetString(propertyName, propertyValue) + } + } } if len(node.Children) > 0 { diff --git a/microdata_test.go b/microdata_test.go index 87d0fcb..ee02c1e 100644 --- a/microdata_test.go +++ b/microdata_test.go @@ -270,10 +270,13 @@ func TestReadTwoPropertiesOneValue(t *testing.T) { ` item := ReadOneItem(html, t) - if len(item.properties["favorite-color"]) != 2 { + if len(item.properties) != 2 { + t.Errorf("Expecting 2 properties but got %d",len(item.properties) ) + } + if len(item.properties["favorite-color"]) != 1 { t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) ) } - if len(item.properties["favorite-fruit"]) != 2 { + if len(item.properties["favorite-fruit"]) != 1 { t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) ) } if item.properties["favorite-color"][0].(string) != "orange" { @@ -282,6 +285,64 @@ func TestReadTwoPropertiesOneValue(t *testing.T) { if item.properties["favorite-fruit"][0].(string) != "orange" { t.Errorf("Property value 'orange' not found for 'favorite-fruit'") } - - } + +func TestReadTwoPropertiesOneValueMultispaced(t *testing.T) { + html := ` +