Added examples to README

master
Ian Davis 2012-06-10 19:49:15 +01:00
parent ffada1099e
commit 0b1fbba09d
3 changed files with 111 additions and 22 deletions

View File

@ -15,20 +15,65 @@ USAGE
Example of parsing a string containing HTML: Example of parsing a string containing HTML:
include ( package main
import (
"github.com/iand/microdata"
"net/url" "net/url"
"strings" "strings"
) )
html = `<div itemscope>
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
</div>`
baseUrl, _ := url.Parse("http://example.com/") func main() {
p := NewParser(strings.NewReader(html), baseUrl) html := `<div itemscope>
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
</div>`
data, err := p.Parse() baseUrl, _ := url.Parse("http://example.com/")
if err != nil { p := microdata.NewParser(strings.NewReader(html), baseUrl)
t.Errorf("Expected no error but got %d", err)
}
println("Name: ", data.items[0].properties["name"][0] data, err := p.Parse()
if err != nil {
panic(err)
}
println("Name: ", data.Items[0].Properties["name"][0].(string))
}
Extract microdata from a webpage and print the result as JSON
package main
import (
"bytes"
"github.com/iand/microdata"
"io/ioutil"
"net/http"
"net/url"
"os"
)
func main() {
baseUrl, _ := url.Parse("http://tagger.steve.museum/steve/object/44863?offset=6")
resp, _ := http.Get(baseUrl.String())
defer resp.Body.Close()
html, _ := ioutil.ReadAll(resp.Body)
p := microdata.NewParser(bytes.NewReader(html), baseUrl)
data, _ := p.Parse()
json, _ := data.Json()
os.Stdout.Write(json)
}
LICENSE
=======
This code and associated documentation is in the public domain.
To the extent possible under law, Ian Davis has waived all copyright
and related or neighboring rights to this file. This work is published
from the United Kingdom.

View File

@ -3,6 +3,7 @@ package microdata
import ( import (
"bytes" "bytes"
"code.google.com/p/go-html-transform/h5" "code.google.com/p/go-html-transform/h5"
"encoding/json"
"io" "io"
"net/url" "net/url"
"strings" "strings"
@ -12,9 +13,9 @@ type ValueList []interface{}
type PropertyMap map[string]ValueList type PropertyMap map[string]ValueList
type Item struct { type Item struct {
Properties PropertyMap Properties PropertyMap `json:"properties"`
Types []string Types []string `json:"type,omitempty"`
ID string ID string `json:"id,omitempty"`
} }
func NewItem() *Item { func NewItem() *Item {
@ -32,9 +33,12 @@ func (self *Item) SetItem(property string, value *Item) {
self.Properties[property] = append(self.Properties[property], value) self.Properties[property] = append(self.Properties[property], value)
} }
func (self *Item) AddType(value string) {
self.Types = append(self.Types, value)
}
type Microdata struct { type Microdata struct {
Items []*Item Items []*Item `json:"items"`
} }
func NewMicrodata() *Microdata { func NewMicrodata() *Microdata {
@ -43,10 +47,22 @@ func NewMicrodata() *Microdata {
} }
} }
func (self *Microdata) AddItem(value *Item) {
self.Items = append(self.Items, value)
}
func (self *Microdata) Json() ([]byte, error) {
b, err := json.Marshal(self)
if err != nil {
return nil, err
}
return b, nil
}
type Parser struct { type Parser struct {
p *h5.Parser p *h5.Parser
data *Microdata data *Microdata
base *url.URL base *url.URL
identifiedNodes map[string]*h5.Node identifiedNodes map[string]*h5.Node
} }
@ -198,7 +214,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
} }
} }
if len(node.Children) > 0 { if len(node.Children) > 0 {

View File

@ -1,6 +1,7 @@
package microdata package microdata
import ( import (
"bytes"
"net/url" "net/url"
"strings" "strings"
"testing" "testing"
@ -449,7 +450,6 @@ func TestParseMultiValuedItemRef(t *testing.T) {
} }
} }
func TestParseEmbeddedItem(t *testing.T) { func TestParseEmbeddedItem(t *testing.T) {
html := `<div itemscope> html := `<div itemscope>
<p>Name: <span itemprop="name">Amanda</span></p> <p>Name: <span itemprop="name">Amanda</span></p>
@ -462,7 +462,6 @@ func TestParseEmbeddedItem(t *testing.T) {
t.Errorf("Expecting 1 item but got %d", len(data.Items)) t.Errorf("Expecting 1 item but got %d", len(data.Items))
} }
if data.Items[0].Properties["name"][0].(string) != "Amanda" { if data.Items[0].Properties["name"][0].(string) != "Amanda" {
t.Errorf("Property value 'Amanda' not found for 'name'") t.Errorf("Property value 'Amanda' not found for 'name'")
} }
@ -490,7 +489,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) {
t.Errorf("Expecting 1 item but got %d", len(data.Items)) t.Errorf("Expecting 1 item but got %d", len(data.Items))
} }
if data.Items[0].Properties["name"][0].(string) != "Amanda" { if data.Items[0].Properties["name"][0].(string) != "Amanda" {
t.Errorf("Property value 'Amanda' not found for 'name'") t.Errorf("Property value 'Amanda' not found for 'name'")
} }
@ -502,8 +500,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) {
} }
} }
func TestParseRelativeURL(t *testing.T) { func TestParseRelativeURL(t *testing.T) {
html := ` html := `
<div itemscope> <div itemscope>
@ -535,3 +531,36 @@ func TestParseItemRelativeId(t *testing.T) {
t.Errorf("Expecting id of 'http://example.com/foo' but got %d", item.ID) t.Errorf("Expecting id of 'http://example.com/foo' but got %d", item.ID)
} }
} }
func TestJson(t *testing.T) {
item := NewItem()
item.SetString("name", "Elizabeth")
data := NewMicrodata()
data.AddItem(item)
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]}}]}`)
actual, _ := data.Json()
if !bytes.Equal(actual, expected) {
t.Errorf("Expecting %s but got %s", expected, actual)
}
}
func TestJsonWithType(t *testing.T) {
item := NewItem()
item.AddType("http://example.org/animals#cat")
item.SetString("name", "Elizabeth")
data := NewMicrodata()
data.AddItem(item)
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]},"type":["http://example.org/animals#cat"]}]}`)
actual, _ := data.Json()
if !bytes.Equal(actual, expected) {
t.Errorf("Expecting %s but got %s", expected, actual)
}
}