diff --git a/README.md b/README.md index 527bf8d..ff9c0fd 100644 --- a/README.md +++ b/README.md @@ -15,20 +15,65 @@ USAGE Example of parsing a string containing HTML: - include ( + package main + + import ( + "github.com/iand/microdata" "net/url" "strings" ) - html = `
-

My name is Elizabeth.

-
` - baseUrl, _ := url.Parse("http://example.com/") - p := NewParser(strings.NewReader(html), baseUrl) + func main() { + html := `
+

My name is Elizabeth.

+
` - data, err := p.Parse() - if err != nil { - t.Errorf("Expected no error but got %d", err) - } + baseUrl, _ := url.Parse("http://example.com/") + p := microdata.NewParser(strings.NewReader(html), baseUrl) - println("Name: ", data.items[0].properties["name"][0] \ No newline at end of file + data, err := p.Parse() + if err != nil { + panic(err) + } + + println("Name: ", data.Items[0].Properties["name"][0].(string)) + } + +Extract microdata from a webpage and print the result as JSON + + package main + + import ( + "bytes" + "github.com/iand/microdata" + "io/ioutil" + "net/http" + "net/url" + "os" + ) + + func main() { + + baseUrl, _ := url.Parse("http://tagger.steve.museum/steve/object/44863?offset=6") + + resp, _ := http.Get(baseUrl.String()) + defer resp.Body.Close() + + html, _ := ioutil.ReadAll(resp.Body) + + p := microdata.NewParser(bytes.NewReader(html), baseUrl) + + data, _ := p.Parse() + + json, _ := data.Json() + os.Stdout.Write(json) + } + + +LICENSE +======= +This code and associated documentation is in the public domain. + +To the extent possible under law, Ian Davis has waived all copyright +and related or neighboring rights to this file. This work is published +from the United Kingdom. diff --git a/microdata.go b/microdata.go index b98ab7b..67afd60 100644 --- a/microdata.go +++ b/microdata.go @@ -3,6 +3,7 @@ package microdata import ( "bytes" "code.google.com/p/go-html-transform/h5" + "encoding/json" "io" "net/url" "strings" @@ -12,9 +13,9 @@ type ValueList []interface{} type PropertyMap map[string]ValueList type Item struct { - Properties PropertyMap - Types []string - ID string + Properties PropertyMap `json:"properties"` + Types []string `json:"type,omitempty"` + ID string `json:"id,omitempty"` } func NewItem() *Item { @@ -32,9 +33,12 @@ func (self *Item) SetItem(property string, value *Item) { self.Properties[property] = append(self.Properties[property], value) } +func (self *Item) AddType(value string) { + self.Types = append(self.Types, value) +} type Microdata struct { - Items []*Item + Items []*Item `json:"items"` } func NewMicrodata() *Microdata { @@ -43,10 +47,22 @@ func NewMicrodata() *Microdata { } } +func (self *Microdata) AddItem(value *Item) { + self.Items = append(self.Items, value) +} + +func (self *Microdata) Json() ([]byte, error) { + b, err := json.Marshal(self) + if err != nil { + return nil, err + } + return b, nil +} + type Parser struct { p *h5.Parser data *Microdata - base *url.URL + base *url.URL identifiedNodes map[string]*h5.Node } @@ -198,7 +214,6 @@ func (self *Parser) readItem(item *Item, node *h5.Node) { } - } if len(node.Children) > 0 { diff --git a/microdata_test.go b/microdata_test.go index 172d3af..428d830 100644 --- a/microdata_test.go +++ b/microdata_test.go @@ -1,6 +1,7 @@ package microdata import ( + "bytes" "net/url" "strings" "testing" @@ -449,7 +450,6 @@ func TestParseMultiValuedItemRef(t *testing.T) { } } - func TestParseEmbeddedItem(t *testing.T) { html := `

Name: Amanda

@@ -462,7 +462,6 @@ func TestParseEmbeddedItem(t *testing.T) { t.Errorf("Expecting 1 item but got %d", len(data.Items)) } - if data.Items[0].Properties["name"][0].(string) != "Amanda" { t.Errorf("Property value 'Amanda' not found for 'name'") } @@ -490,7 +489,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) { t.Errorf("Expecting 1 item but got %d", len(data.Items)) } - if data.Items[0].Properties["name"][0].(string) != "Amanda" { t.Errorf("Property value 'Amanda' not found for 'name'") } @@ -502,8 +500,6 @@ func TestParseEmbeddedItemWithItemRef(t *testing.T) { } } - - func TestParseRelativeURL(t *testing.T) { html := `
@@ -535,3 +531,36 @@ func TestParseItemRelativeId(t *testing.T) { t.Errorf("Expecting id of 'http://example.com/foo' but got %d", item.ID) } } + +func TestJson(t *testing.T) { + item := NewItem() + item.SetString("name", "Elizabeth") + + data := NewMicrodata() + data.AddItem(item) + + expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]}}]}`) + + actual, _ := data.Json() + + if !bytes.Equal(actual, expected) { + t.Errorf("Expecting %s but got %s", expected, actual) + } +} + +func TestJsonWithType(t *testing.T) { + item := NewItem() + item.AddType("http://example.org/animals#cat") + item.SetString("name", "Elizabeth") + + data := NewMicrodata() + data.AddItem(item) + + expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]},"type":["http://example.org/animals#cat"]}]}`) + + actual, _ := data.Json() + + if !bytes.Equal(actual, expected) { + t.Errorf("Expecting %s but got %s", expected, actual) + } +}