forked from ukamnya/microdata_mirror
		
	Added parsing of itemref
This commit is contained in:
		
							parent
							
								
									a58ae8f1fe
								
							
						
					
					
						commit
						aec670c37a
					
				
							
								
								
									
										36
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								microdata.go
									
									
									
									
									
								
							| @ -42,6 +42,7 @@ func NewMicrodata() *Microdata { | ||||
| type Parser struct { | ||||
| 	p *h5.Parser | ||||
| 	data *Microdata | ||||
| 	identifiedNodes map[string]*h5.Node | ||||
| } | ||||
| 
 | ||||
| func NewParser(r io.Reader) *Parser { | ||||
| @ -58,17 +59,23 @@ func (self *Parser) Parse() (*Microdata, error) { | ||||
| 	} | ||||
| 	tree := self.p.Tree() | ||||
| 
 | ||||
| 	self.scanForItem(tree) | ||||
| 	topLevelItemNodes := make([]*h5.Node, 0) | ||||
| 	self.identifiedNodes = make(map[string]*h5.Node, 0) | ||||
| 
 | ||||
| 	return self.data, nil | ||||
| 
 | ||||
| 	tree.Walk( func(n *h5.Node) { | ||||
| 		if _, exists := getAttr("itemscope", n); exists { | ||||
| 			if _, exists := getAttr("itemprop", n); !exists { | ||||
| 				topLevelItemNodes = append(topLevelItemNodes, n) | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| func (self *Parser) scanForItem(node *h5.Node) { | ||||
| 	if node == nil { | ||||
| 		return | ||||
| 		if id, exists := getAttr("id", n); exists { | ||||
| 			self.identifiedNodes[id] = n | ||||
| 		} | ||||
| 		}) | ||||
| 
 | ||||
| 	if _, exists := getAttr("itemscope", node); exists { | ||||
| 	for _, node := range topLevelItemNodes { | ||||
| 		item := NewItem() | ||||
| 		self.data.items = append(self.data.items, item) | ||||
| 		if itemtypes, exists := getAttr("itemtype", node); exists { | ||||
| @ -85,23 +92,24 @@ func (self *Parser) scanForItem(node *h5.Node) { | ||||
| 			 | ||||
| 		}  | ||||
| 
 | ||||
| 		if itemref, exists := getAttr("itemref", node); exists { | ||||
| 			if refnode, exists := self.identifiedNodes[itemref]; exists { | ||||
| 	        	self.readItem(item, refnode) | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if len(node.Children) > 0 { | ||||
| 	    	for _, child := range node.Children { | ||||
| 	        	self.readItem(item, child) | ||||
| 	        } | ||||
| 	    } | ||||
| 
 | ||||
| 	} else { | ||||
| 		if len(node.Children) > 0 { | ||||
| 	    	for _, child := range node.Children { | ||||
| 	        	self.scanForItem(child) | ||||
| 	        } | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return self.data, nil | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| func (self *Parser) readItem(item *Item, node *h5.Node) { | ||||
| 	if itemprop, exists := getAttr("itemprop", node); exists { | ||||
| 		var propertyValue string | ||||
| @ -144,6 +152,8 @@ func (self *Parser) readItem(item *Item, node *h5.Node) { | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 	if len(node.Children) > 0 { | ||||
|     	for _, child := range node.Children { | ||||
|         	self.readItem(item, child) | ||||
|  | ||||
| @ -366,3 +366,72 @@ func TestParseItemId(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| func TestParseItemRef(t *testing.T) { | ||||
| 	html := `<body><p><figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses"> | ||||
|    <img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest."> | ||||
|    <figcaption itemprop="title">The house I found.</figcaption> | ||||
|   </figure></p> | ||||
|    <p id="licenses">All images licensed under the <a itemprop="license" | ||||
|    href="http://www.opensource.org/licenses/mit-license.php">MIT | ||||
|    license</a>.</p></body>` | ||||
| 
 | ||||
| 	item := ParseOneItem(html, t) | ||||
| 
 | ||||
| 
 | ||||
| 	if len(item.properties) != 3 { | ||||
| 		t.Errorf("Expecting 3 properties but got %d",len(item.properties) ) | ||||
| 	} | ||||
| 
 | ||||
| 	if item.properties["license"][0].(string) != "http://www.opensource.org/licenses/mit-license.php" { | ||||
| 		t.Errorf("Property value 'http://www.opensource.org/licenses/mit-license.php' not found for 'license'") | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| func TestParseSharedItemRef(t *testing.T) { | ||||
| 	html := `<!DOCTYPE HTML> | ||||
| 		<html> | ||||
| 		 <head> | ||||
| 		  <title>Photo gallery</title> | ||||
| 		 </head> | ||||
| 		 <body> | ||||
| 		  <h1>My photos</h1> | ||||
| 		  <figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses"> | ||||
| 		   <img itemprop="work" src="images/house.jpeg" alt="A white house, boarded up, sits in a forest."> | ||||
| 		   <figcaption itemprop="title">The house I found.</figcaption> | ||||
| 		  </figure> | ||||
| 		  <figure itemscope itemtype="http://n.whatwg.org/work" itemref="licenses"> | ||||
| 		   <img itemprop="work" src="images/mailbox.jpeg" alt="Outside the house is a mailbox. It has a leaflet inside."> | ||||
| 		   <figcaption itemprop="title">The mailbox.</figcaption> | ||||
| 		  </figure> | ||||
| 		  <footer> | ||||
| 		   <p id="licenses">All images licensed under the <a itemprop="license" | ||||
| 		   href="http://www.opensource.org/licenses/mit-license.php">MIT | ||||
| 		   license</a>.</p> | ||||
| 		  </footer> | ||||
| 		 </body> | ||||
| 		</html>` | ||||
| 
 | ||||
| 	data := ParseData(html, t) | ||||
| 
 | ||||
| 	if len(data.items) != 2 { | ||||
| 		t.Errorf("Expecting 2 items but got %d",len(data.items) ) | ||||
| 	} | ||||
| 	if len(data.items[0].properties) != 3 { | ||||
| 		t.Errorf("Expecting 3 properties but got %d",len(data.items[0].properties) ) | ||||
| 	} | ||||
| 	if len(data.items[1].properties) != 3 { | ||||
| 		t.Errorf("Expecting 3 properties but got %d",len(data.items[1].properties) ) | ||||
| 	} | ||||
| 
 | ||||
| 	if data.items[0].properties["license"][0].(string) != "http://www.opensource.org/licenses/mit-license.php" { | ||||
| 		t.Errorf("Property value 'http://www.opensource.org/licenses/mit-license.php' not found for 'license'") | ||||
| 	} | ||||
| 
 | ||||
| 	if data.items[1].properties["license"][0].(string) != "http://www.opensource.org/licenses/mit-license.php" { | ||||
| 		t.Errorf("Property value 'http://www.opensource.org/licenses/mit-license.php' not found for 'license'") | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user