Added subitems

pull/2/head
Ian Davis 2012-06-10 16:22:51 +01:00
parent a4244ac31d
commit cc14cf6b0c
2 changed files with 99 additions and 31 deletions

View File

@ -27,6 +27,11 @@ func (self *Item) SetString(property string, value string) {
self.properties[property] = append(self.properties[property], value)
}
func (self *Item) SetItem(property string, value *Item) {
self.properties[property] = append(self.properties[property], value)
}
type Microdata struct {
items []*Item
}
@ -111,44 +116,78 @@ func (self *Parser) Parse() (*Microdata, error) {
func (self *Parser) readItem(item *Item, node *h5.Node) {
if itemprop, exists := getAttr("itemprop", node); exists {
var propertyValue string
if _, exists := getAttr("itemscope", node); exists {
subitem := NewItem()
switch node.Data() {
if itemrefs, exists := getAttr("itemref", node); exists {
for _, itemref := range strings.Split(strings.TrimSpace(itemrefs), " ") {
itemref = strings.TrimSpace(itemref)
case "img", "audio", "source", "video", "embed", "iframe", "track":
if urlValue, exists := getAttr("src", node); exists {
propertyValue = urlValue
}
case "a", "area", "link":
if urlValue, exists := getAttr("href", node); exists {
propertyValue = urlValue
}
case "data":
if urlValue, exists := getAttr("value", node); exists {
propertyValue = urlValue
}
case "time":
if urlValue, exists := getAttr("datetime", node); exists {
propertyValue = urlValue
if refnode, exists := self.identifiedNodes[itemref]; exists {
self.readItem(subitem, refnode)
}
}
}
default:
var text bytes.Buffer
node.Walk(func(n *h5.Node) {
if n.Type == h5.TextNode {
text.WriteString(n.Data())
if len(node.Children) > 0 {
for _, child := range node.Children {
self.readItem(subitem, child)
}
}
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName)
if propertyName != "" {
item.SetItem(propertyName, subitem)
}
}
return
} else {
var propertyValue string
switch node.Data() {
case "img", "audio", "source", "video", "embed", "iframe", "track":
if urlValue, exists := getAttr("src", node); exists {
propertyValue = urlValue
}
case "a", "area", "link":
if urlValue, exists := getAttr("href", node); exists {
propertyValue = urlValue
}
case "data":
if urlValue, exists := getAttr("value", node); exists {
propertyValue = urlValue
}
case "time":
if urlValue, exists := getAttr("datetime", node); exists {
propertyValue = urlValue
}
})
propertyValue = text.String()
default:
var text bytes.Buffer
node.Walk(func(n *h5.Node) {
if n.Type == h5.TextNode {
text.WriteString(n.Data())
}
})
propertyValue = text.String()
}
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName)
if propertyName != "" {
item.SetString(propertyName, propertyValue)
}
}
}
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName)
if propertyName != "" {
item.SetString(propertyName, propertyValue)
}
}
}
if len(node.Children) > 0 {
@ -156,6 +195,7 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
self.readItem(item, child)
}
}
}
func getAttr(name string, node *h5.Node) (string, bool) {

View File

@ -465,7 +465,35 @@ func TestParseEmbeddedItem(t *testing.T) {
t.Errorf("Property value 'Amanda' not found for 'name'")
}
subitem := data.items[0].properties["band"][0].(Item)
subitem := data.items[0].properties["band"][0].(*Item)
if subitem.properties["name"][0].(string) != "Jazz Band" {
t.Errorf("Property value 'Jazz Band' not found for 'name'")
}
}
func TestParseEmbeddedItemWithItemRef(t *testing.T) {
html := `<body>
<div itemscope id="amanda" itemref="a b"></div>
<p id="a">Name: <span itemprop="name">Amanda</span></p>
<div id="b" itemprop="band" itemscope itemref="c"></div>
<div id="c">
<p>Band: <span itemprop="name">Jazz Band</span></p>
<p>Size: <span itemprop="size">12</span> players</p>
</div></body>`
data := ParseData(html, t)
if len(data.items) != 1 {
t.Errorf("Expecting 1 item but got %d", len(data.items))
}
if data.items[0].properties["name"][0].(string) != "Amanda" {
t.Errorf("Property value 'Amanda' not found for 'name'")
}
subitem := data.items[0].properties["band"][0].(*Item)
if subitem.properties["name"][0].(string) != "Jazz Band" {
t.Errorf("Property value 'Jazz Band' not found for 'name'")