Added examples to README
parent
ffada1099e
commit
0b1fbba09d
65
README.md
65
README.md
|
@ -15,20 +15,65 @@ USAGE
|
|||
|
||||
Example of parsing a string containing HTML:
|
||||
|
||||
include (
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/iand/microdata"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
html = `<div itemscope>
|
||||
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
|
||||
</div>`
|
||||
|
||||
baseUrl, _ := url.Parse("http://example.com/")
|
||||
p := NewParser(strings.NewReader(html), baseUrl)
|
||||
func main() {
|
||||
html := `<div itemscope>
|
||||
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
|
||||
</div>`
|
||||
|
||||
data, err := p.Parse()
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error but got %d", err)
|
||||
baseUrl, _ := url.Parse("http://example.com/")
|
||||
p := microdata.NewParser(strings.NewReader(html), baseUrl)
|
||||
|
||||
data, err := p.Parse()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
println("Name: ", data.Items[0].Properties["name"][0].(string))
|
||||
}
|
||||
|
||||
println("Name: ", data.items[0].properties["name"][0]
|
||||
Extract microdata from a webpage and print the result as JSON
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/iand/microdata"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
baseUrl, _ := url.Parse("http://tagger.steve.museum/steve/object/44863?offset=6")
|
||||
|
||||
resp, _ := http.Get(baseUrl.String())
|
||||
defer resp.Body.Close()
|
||||
|
||||
html, _ := ioutil.ReadAll(resp.Body)
|
||||
|
||||
p := microdata.NewParser(bytes.NewReader(html), baseUrl)
|
||||
|
||||
data, _ := p.Parse()
|
||||
|
||||
json, _ := data.Json()
|
||||
os.Stdout.Write(json)
|
||||
}
|
||||
|
||||
|
||||
LICENSE
|
||||
=======
|
||||
This code and associated documentation is in the public domain.
|
||||
|
||||
To the extent possible under law, Ian Davis has waived all copyright
|
||||
and related or neighboring rights to this file. This work is published
|
||||
from the United Kingdom.
|
||||
|
|
27
microdata.go
27
microdata.go
|
@ -3,6 +3,7 @@ package microdata
|
|||
import (
|
||||
"bytes"
|
||||
"code.google.com/p/go-html-transform/h5"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
@ -12,9 +13,9 @@ type ValueList []interface{}
|
|||
type PropertyMap map[string]ValueList
|
||||
|
||||
type Item struct {
|
||||
Properties PropertyMap
|
||||
Types []string
|
||||
ID string
|
||||
Properties PropertyMap `json:"properties"`
|
||||
Types []string `json:"type,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
}
|
||||
|
||||
func NewItem() *Item {
|
||||
|
@ -32,9 +33,12 @@ func (self *Item) SetItem(property string, value *Item) {
|
|||
self.Properties[property] = append(self.Properties[property], value)
|
||||
}
|
||||
|
||||
func (self *Item) AddType(value string) {
|
||||
self.Types = append(self.Types, value)
|
||||
}
|
||||
|
||||
type Microdata struct {
|
||||
Items []*Item
|
||||
Items []*Item `json:"items"`
|
||||
}
|
||||
|
||||
func NewMicrodata() *Microdata {
|
||||
|
@ -43,10 +47,22 @@ func NewMicrodata() *Microdata {
|
|||
}
|
||||
}
|
||||
|
||||
func (self *Microdata) AddItem(value *Item) {
|
||||
self.Items = append(self.Items, value)
|
||||
}
|
||||
|
||||
func (self *Microdata) Json() ([]byte, error) {
|
||||
b, err := json.Marshal(self)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
type Parser struct {
|
||||
p *h5.Parser
|
||||
data *Microdata
|
||||
base *url.URL
|
||||
base *url.URL
|
||||
identifiedNodes map[string]*h5.Node
|
||||
}
|
||||
|
||||
|
@ -198,7 +214,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) {
|
|||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if len(node.Children) > 0 {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package microdata
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -449,7 +450,6 @@ func TestParseMultiValuedItemRef(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
func TestParseEmbeddedItem(t *testing.T) {
|
||||
html := `<div itemscope>
|
||||
<p>Name: <span itemprop="name">Amanda</span></p>
|
||||
|
@ -462,7 +462,6 @@ func TestParseEmbeddedItem(t *testing.T) {
|
|||
t.Errorf("Expecting 1 item but got %d", len(data.Items))
|
||||
}
|
||||
|
||||
|
||||
if data.Items[0].Properties["name"][0].(string) != "Amanda" {
|
||||
t.Errorf("Property value 'Amanda' not found for 'name'")
|
||||
}
|
||||
|
@ -490,7 +489,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) {
|
|||
t.Errorf("Expecting 1 item but got %d", len(data.Items))
|
||||
}
|
||||
|
||||
|
||||
if data.Items[0].Properties["name"][0].(string) != "Amanda" {
|
||||
t.Errorf("Property value 'Amanda' not found for 'name'")
|
||||
}
|
||||
|
@ -502,8 +500,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
func TestParseRelativeURL(t *testing.T) {
|
||||
html := `
|
||||
<div itemscope>
|
||||
|
@ -535,3 +531,36 @@ func TestParseItemRelativeId(t *testing.T) {
|
|||
t.Errorf("Expecting id of 'http://example.com/foo' but got %d", item.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJson(t *testing.T) {
|
||||
item := NewItem()
|
||||
item.SetString("name", "Elizabeth")
|
||||
|
||||
data := NewMicrodata()
|
||||
data.AddItem(item)
|
||||
|
||||
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]}}]}`)
|
||||
|
||||
actual, _ := data.Json()
|
||||
|
||||
if !bytes.Equal(actual, expected) {
|
||||
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func TestJsonWithType(t *testing.T) {
|
||||
item := NewItem()
|
||||
item.AddType("http://example.org/animals#cat")
|
||||
item.SetString("name", "Elizabeth")
|
||||
|
||||
data := NewMicrodata()
|
||||
data.AddItem(item)
|
||||
|
||||
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]},"type":["http://example.org/animals#cat"]}]}`)
|
||||
|
||||
actual, _ := data.Json()
|
||||
|
||||
if !bytes.Equal(actual, expected) {
|
||||
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue