forked from ukamnya/microdata_mirror
		
	Added subitems
This commit is contained in:
		
							parent
							
								
									a4244ac31d
								
							
						
					
					
						commit
						cc14cf6b0c
					
				
							
								
								
									
										100
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										100
									
								
								microdata.go
									
									
									
									
									
								
							@ -27,6 +27,11 @@ func (self *Item) SetString(property string, value string) {
 | 
			
		||||
	self.properties[property] = append(self.properties[property], value)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (self *Item) SetItem(property string, value *Item) {
 | 
			
		||||
	self.properties[property] = append(self.properties[property], value)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
type Microdata struct {
 | 
			
		||||
	items []*Item
 | 
			
		||||
}
 | 
			
		||||
@ -111,44 +116,78 @@ func (self *Parser) Parse() (*Microdata, error) {
 | 
			
		||||
 | 
			
		||||
func (self *Parser) readItem(item *Item, node *h5.Node) {
 | 
			
		||||
	if itemprop, exists := getAttr("itemprop", node); exists {
 | 
			
		||||
		var propertyValue string
 | 
			
		||||
		if _, exists := getAttr("itemscope", node); exists {
 | 
			
		||||
			subitem := NewItem()
 | 
			
		||||
 | 
			
		||||
		switch node.Data() {
 | 
			
		||||
			if itemrefs, exists := getAttr("itemref", node); exists {
 | 
			
		||||
				for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
 | 
			
		||||
					itemref = strings.TrimSpace(itemref)
 | 
			
		||||
 | 
			
		||||
		case "img", "audio", "source", "video", "embed", "iframe", "track":
 | 
			
		||||
			if urlValue, exists := getAttr("src", node); exists {
 | 
			
		||||
				propertyValue = urlValue
 | 
			
		||||
			}
 | 
			
		||||
		case "a", "area", "link":
 | 
			
		||||
			if urlValue, exists := getAttr("href", node); exists {
 | 
			
		||||
				propertyValue = urlValue
 | 
			
		||||
			}
 | 
			
		||||
		case "data":
 | 
			
		||||
			if urlValue, exists := getAttr("value", node); exists {
 | 
			
		||||
				propertyValue = urlValue
 | 
			
		||||
			}
 | 
			
		||||
		case "time":
 | 
			
		||||
			if urlValue, exists := getAttr("datetime", node); exists {
 | 
			
		||||
				propertyValue = urlValue
 | 
			
		||||
					if refnode, exists := self.identifiedNodes[itemref]; exists {
 | 
			
		||||
						self.readItem(subitem, refnode)
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		default:
 | 
			
		||||
			var text bytes.Buffer
 | 
			
		||||
			node.Walk(func(n *h5.Node) {
 | 
			
		||||
				if n.Type == h5.TextNode {
 | 
			
		||||
					text.WriteString(n.Data())
 | 
			
		||||
			if len(node.Children) > 0 {
 | 
			
		||||
				for _, child := range node.Children {
 | 
			
		||||
					self.readItem(subitem, child)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
			
		||||
				propertyName = strings.TrimSpace(propertyName)
 | 
			
		||||
				if propertyName != "" {
 | 
			
		||||
					item.SetItem(propertyName, subitem)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			return
 | 
			
		||||
 | 
			
		||||
		} else {
 | 
			
		||||
			var propertyValue string
 | 
			
		||||
 | 
			
		||||
			switch node.Data() {
 | 
			
		||||
 | 
			
		||||
			case "img", "audio", "source", "video", "embed", "iframe", "track":
 | 
			
		||||
				if urlValue, exists := getAttr("src", node); exists {
 | 
			
		||||
					propertyValue = urlValue
 | 
			
		||||
				}
 | 
			
		||||
			case "a", "area", "link":
 | 
			
		||||
				if urlValue, exists := getAttr("href", node); exists {
 | 
			
		||||
					propertyValue = urlValue
 | 
			
		||||
				}
 | 
			
		||||
			case "data":
 | 
			
		||||
				if urlValue, exists := getAttr("value", node); exists {
 | 
			
		||||
					propertyValue = urlValue
 | 
			
		||||
				}
 | 
			
		||||
			case "time":
 | 
			
		||||
				if urlValue, exists := getAttr("datetime", node); exists {
 | 
			
		||||
					propertyValue = urlValue
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			})
 | 
			
		||||
			propertyValue = text.String()
 | 
			
		||||
			default:
 | 
			
		||||
				var text bytes.Buffer
 | 
			
		||||
				node.Walk(func(n *h5.Node) {
 | 
			
		||||
					if n.Type == h5.TextNode {
 | 
			
		||||
						text.WriteString(n.Data())
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
				})
 | 
			
		||||
				propertyValue = text.String()
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
			
		||||
				propertyName = strings.TrimSpace(propertyName)
 | 
			
		||||
				if propertyName != "" {
 | 
			
		||||
					item.SetString(propertyName, propertyValue)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
			
		||||
			propertyName = strings.TrimSpace(propertyName)
 | 
			
		||||
			if propertyName != "" {
 | 
			
		||||
				item.SetString(propertyName, propertyValue)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(node.Children) > 0 {
 | 
			
		||||
@ -156,6 +195,7 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
 | 
			
		||||
			self.readItem(item, child)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func getAttr(name string, node *h5.Node) (string, bool) {
 | 
			
		||||
 | 
			
		||||
@ -465,7 +465,35 @@ func TestParseEmbeddedItem(t *testing.T) {
 | 
			
		||||
		t.Errorf("Property value 'Amanda' not found for 'name'")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	subitem := data.items[0].properties["band"][0].(Item)
 | 
			
		||||
	subitem := data.items[0].properties["band"][0].(*Item)
 | 
			
		||||
 | 
			
		||||
	if subitem.properties["name"][0].(string) != "Jazz Band" {
 | 
			
		||||
		t.Errorf("Property value 'Jazz Band' not found for 'name'")
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestParseEmbeddedItemWithItemRef(t *testing.T) {
 | 
			
		||||
	html := `<body>
 | 
			
		||||
			<div itemscope id="amanda" itemref="a b"></div>
 | 
			
		||||
		<p id="a">Name: <span itemprop="name">Amanda</span></p>
 | 
			
		||||
		<div id="b" itemprop="band" itemscope itemref="c"></div>
 | 
			
		||||
		<div id="c">
 | 
			
		||||
		 <p>Band: <span itemprop="name">Jazz Band</span></p>
 | 
			
		||||
		 <p>Size: <span itemprop="size">12</span> players</p>
 | 
			
		||||
		</div></body>`
 | 
			
		||||
 | 
			
		||||
	data := ParseData(html, t)
 | 
			
		||||
 | 
			
		||||
	if len(data.items) != 1 {
 | 
			
		||||
		t.Errorf("Expecting 1 item but got %d", len(data.items))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
	if data.items[0].properties["name"][0].(string) != "Amanda" {
 | 
			
		||||
		t.Errorf("Property value 'Amanda' not found for 'name'")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	subitem := data.items[0].properties["band"][0].(*Item)
 | 
			
		||||
 | 
			
		||||
	if subitem.properties["name"][0].(string) != "Jazz Band" {
 | 
			
		||||
		t.Errorf("Property value 'Jazz Band' not found for 'name'")
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user