Added parsing of itemtype
This commit is contained in:
		
							parent
							
								
									a650a2c9e9
								
							
						
					
					
						commit
						d607667c90
					
				
							
								
								
									
										34
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								microdata.go
									
									
									
									
									
								
							@ -4,6 +4,7 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"code.google.com/p/go-html-transform/h5"
 | 
			
		||||
	"io"
 | 
			
		||||
	"strings"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -13,11 +14,13 @@ type PropertyMap map[string]ValueList
 | 
			
		||||
 | 
			
		||||
type Item struct {
 | 
			
		||||
	properties PropertyMap
 | 
			
		||||
	types []string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func NewItem() *Item {
 | 
			
		||||
	return &Item{
 | 
			
		||||
		properties: make(PropertyMap, 10),
 | 
			
		||||
		properties: make(PropertyMap, 0),
 | 
			
		||||
		types: make([]string, 0),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -64,17 +67,19 @@ func (self *Parser) scanForItem(node *h5.Node) {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	hasItemscope := false
 | 
			
		||||
 | 
			
		||||
	for _, a := range node.Attr {
 | 
			
		||||
		if a.Name == "itemscope" {
 | 
			
		||||
			hasItemscope = true
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if hasItemscope {
 | 
			
		||||
	if _, exists := getAttr("itemscope", node); exists {
 | 
			
		||||
		item := NewItem()
 | 
			
		||||
		self.data.items = append(self.data.items, item)
 | 
			
		||||
		if itemtypes, exists := getAttr("itemtype", node); exists {
 | 
			
		||||
			for _, itemtype := range strings.Split(strings.TrimSpace(itemtypes), " ") {
 | 
			
		||||
				itemtype = strings.TrimSpace(itemtype)
 | 
			
		||||
				if itemtype != "" {
 | 
			
		||||
					item.types = append(item.types, itemtype)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			
 | 
			
		||||
		} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		if len(node.Children) > 0 {
 | 
			
		||||
@ -94,7 +99,7 @@ func (self *Parser) scanForItem(node *h5.Node) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (self *Parser) readItem(item *Item, node *h5.Node) {
 | 
			
		||||
	if propertyName, exists := getAttr("itemprop", node); exists {
 | 
			
		||||
	if itemprop, exists := getAttr("itemprop", node); exists {
 | 
			
		||||
		var propertyValue string
 | 
			
		||||
		
 | 
			
		||||
		switch node.Data() {
 | 
			
		||||
@ -127,7 +132,12 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
 | 
			
		||||
			propertyValue = text.String()
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		item.SetString(propertyName, propertyValue)
 | 
			
		||||
		for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
			
		||||
			propertyName = strings.TrimSpace(propertyName)
 | 
			
		||||
			if propertyName != "" {
 | 
			
		||||
				item.SetString(propertyName, propertyValue)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(node.Children) > 0 {
 | 
			
		||||
 | 
			
		||||
@ -270,10 +270,13 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
 | 
			
		||||
	</div>`
 | 
			
		||||
 | 
			
		||||
	item := ReadOneItem(html, t)
 | 
			
		||||
	if len(item.properties["favorite-color"]) != 2 {
 | 
			
		||||
	if len(item.properties) != 2 {
 | 
			
		||||
		t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
 | 
			
		||||
	}
 | 
			
		||||
	if len(item.properties["favorite-color"]) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
 | 
			
		||||
	}
 | 
			
		||||
	if len(item.properties["favorite-fruit"]) != 2 {
 | 
			
		||||
	if len(item.properties["favorite-fruit"]) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
 | 
			
		||||
	}
 | 
			
		||||
	if item.properties["favorite-color"][0].(string) != "orange" {
 | 
			
		||||
@ -282,6 +285,64 @@ func TestReadTwoPropertiesOneValue(t *testing.T) {
 | 
			
		||||
	if item.properties["favorite-fruit"][0].(string) != "orange" {
 | 
			
		||||
		t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReadTwoPropertiesOneValueMultispaced(t *testing.T) {
 | 
			
		||||
	html := `
 | 
			
		||||
	<div itemscope>
 | 
			
		||||
	 <span itemprop="   favorite-color    favorite-fruit   ">orange</span>
 | 
			
		||||
	</div>`
 | 
			
		||||
 | 
			
		||||
	item := ReadOneItem(html, t)
 | 
			
		||||
	if len(item.properties) != 2 {
 | 
			
		||||
		t.Errorf("Expecting 2 properties but got %d",len(item.properties) )
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(item.properties["favorite-color"]) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-color"]) )
 | 
			
		||||
	}
 | 
			
		||||
	if len(item.properties["favorite-fruit"]) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 value but got %d",len(item.properties["favorite-fruit"]) )
 | 
			
		||||
	}
 | 
			
		||||
	if item.properties["favorite-color"][0].(string) != "orange" {
 | 
			
		||||
		t.Errorf("Property value 'orange' not found for 'favorite-color'")
 | 
			
		||||
	}
 | 
			
		||||
	if item.properties["favorite-fruit"][0].(string) != "orange" {
 | 
			
		||||
		t.Errorf("Property value 'orange' not found for 'favorite-fruit'")
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReadItemType(t *testing.T) {
 | 
			
		||||
	html := `
 | 
			
		||||
	<div itemscope itemtype="http://example.org/animals#cat">
 | 
			
		||||
 		<h1 itemprop="name">Hedral</h1>
 | 
			
		||||
	</div>`
 | 
			
		||||
 | 
			
		||||
	item := ReadOneItem(html, t)
 | 
			
		||||
	if len(item.types) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 type but got %d",len(item.types) )	
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if item.types[0] != "http://example.org/animals#cat" {
 | 
			
		||||
		t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[0]) 
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestReadMultiplrItemTypes(t *testing.T) {
 | 
			
		||||
	html := `
 | 
			
		||||
	<div itemscope itemtype=" http://example.org/animals#mammal  http://example.org/animals#cat  ">
 | 
			
		||||
 		<h1 itemprop="name">Hedral</h1>
 | 
			
		||||
	</div>`
 | 
			
		||||
 | 
			
		||||
	item := ReadOneItem(html, t)
 | 
			
		||||
	if len(item.types) != 2 {
 | 
			
		||||
		t.Errorf("Expecting 2 types but got %d",len(item.types) )	
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if item.types[0] != "http://example.org/animals#mammal" {
 | 
			
		||||
		t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d",item.types[0]) 
 | 
			
		||||
	}
 | 
			
		||||
	if item.types[1] != "http://example.org/animals#cat" {
 | 
			
		||||
		t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d",item.types[1]) 
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user