From d607667c90da0c4f2239e92c3c8f4e57f7322aa7 Mon Sep 17 00:00:00 2001 From: Ian Davis Date: Thu, 7 Jun 2012 15:31:43 +0100 Subject: [PATCH] Added parsing of itemtype --- microdata.go | 34 ++++++++++++++--------- microdata_test.go | 69 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 87 insertions(+), 16 deletions(-) diff --git a/microdata.go b/microdata.go index f79c88c..e73eb1a 100644 --- a/microdata.go +++ b/microdata.go @@ -4,6 +4,7 @@ import ( "bytes" "code.google.com/p/go-html-transform/h5" "io" + "strings" ) @@ -13,11 +14,13 @@ type PropertyMap map[string]ValueList type Item struct { properties PropertyMap + types []string } func NewItem() *Item { return &Item{ - properties: make(PropertyMap, 10), + properties: make(PropertyMap, 0), + types: make([]string, 0), } } @@ -64,17 +67,19 @@ func (self *Parser) scanForItem(node *h5.Node) { return } - hasItemscope := false - - for _, a := range node.Attr { - if a.Name == "itemscope" { - hasItemscope = true - break - } - } - if hasItemscope { + if _, exists := getAttr("itemscope", node); exists { item := NewItem() self.data.items = append(self.data.items, item) + if itemtypes, exists := getAttr("itemtype", node); exists { + for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") { + itemtype = strings.TrimSpace(itemtype) + if itemtype != "" { + item.types = append(item.types, itemtype) + } + } + + + } if len(node.Children) > 0 { @@ -94,7 +99,7 @@ func (self *Parser) scanForItem(node *h5.Node) { } func (self *Parser) readItem(item *Item, node *h5.Node) { - if propertyName, exists := getAttr("itemprop", node); exists { + if itemprop, exists := getAttr("itemprop", node); exists { var propertyValue string switch node.Data() { @@ -127,7 +132,12 @@ func (self *Parser) readItem(item *Item, node *h5.Node) { propertyValue = text.String() } - item.SetString(propertyName, propertyValue) + for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") { + propertyName = strings.TrimSpace(propertyName) + if propertyName != "" { + item.SetString(propertyName, propertyValue) + } + } } if len(node.Children) > 0 { diff --git a/microdata_test.go b/microdata_test.go index 87d0fcb..ee02c1e 100644 --- a/microdata_test.go +++ b/microdata_test.go @@ -270,10 +270,13 @@ func TestReadTwoPropertiesOneValue(t *testing.T) { ` item := ReadOneItem(html, t) - if len(item.properties["favorite-color"]) != 2 { + if len(item.properties) != 2 { + t.Errorf("Expecting 2 properties but got %d",len(item.properties) ) + } + if len(item.properties["favorite-color"]) != 1 { t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) ) } - if len(item.properties["favorite-fruit"]) != 2 { + if len(item.properties["favorite-fruit"]) != 1 { t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) ) } if item.properties["favorite-color"][0].(string) != "orange" { @@ -282,6 +285,64 @@ func TestReadTwoPropertiesOneValue(t *testing.T) { if item.properties["favorite-fruit"][0].(string) != "orange" { t.Errorf("Property value 'orange' not found for 'favorite-fruit'") } - - } + +func TestReadTwoPropertiesOneValueMultispaced(t *testing.T) { + html := ` +
+ orange +
` + + item := ReadOneItem(html, t) + if len(item.properties) != 2 { + t.Errorf("Expecting 2 properties but got %d",len(item.properties) ) + } + + if len(item.properties["favorite-color"]) != 1 { + t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) ) + } + if len(item.properties["favorite-fruit"]) != 1 { + t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) ) + } + if item.properties["favorite-color"][0].(string) != "orange" { + t.Errorf("Property value 'orange' not found for 'favorite-color'") + } + if item.properties["favorite-fruit"][0].(string) != "orange" { + t.Errorf("Property value 'orange' not found for 'favorite-fruit'") + } +} + +func TestReadItemType(t *testing.T) { + html := ` +
+

Hedral

+
` + + item := ReadOneItem(html, t) + if len(item.types) != 1 { + t.Errorf("Expecting 1 type but got %d",len(item.types) ) + } + + if item.types[0] != "http://example.org/animals#cat" { + t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[0]) + } +} + +func TestReadMultiplrItemTypes(t *testing.T) { + html := ` +
+

Hedral

+
` + + item := ReadOneItem(html, t) + if len(item.types) != 2 { + t.Errorf("Expecting 2 types but got %d",len(item.types) ) + } + + if item.types[0] != "http://example.org/animals#mammal" { + t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d",item.types[0]) + } + if item.types[1] != "http://example.org/animals#cat" { + t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[1]) + } +} \ No newline at end of file