diff --git a/README.md b/README.md index eb188eb..c891132 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # microdata + A microdata parser in Go See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata @@ -9,79 +10,79 @@ See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more Simply run - go get github.com/iand/microdata + go get github.com/iand/microdata Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata) - ## Usage Example of parsing a string containing HTML: - package main +```go +package main - import ( - "github.com/iand/microdata" - "net/url" - "strings" - ) +import ( + "github.com/iand/microdata" + "net/url" + "strings" +) - func main() { - html := `
-

My name is Elizabeth.

-
` +func main() { + html := `
+

My name is Elizabeth.

+
` - baseUrl, _ := url.Parse("http://example.com/") - p := microdata.NewParser(strings.NewReader(html), baseUrl) + baseUrl, _ := url.Parse("http://example.com/") + p := microdata.NewParser(strings.NewReader(html), baseUrl) - data, err := p.Parse() - if err != nil { - panic(err) - } + data, err := p.Parse() + if err != nil { + panic(err) + } - println("Name: ", data.Items[0].Properties["name"][0].(string)) - } + println("Name: ", data.Items[0].Properties["name"][0].(string)) +} +``` Extract microdata from a webpage and print the result as JSON - package main +```go +package main - import ( - "bytes" - "io/ioutil" - "net/http" - "net/url" - "os" +import ( + "bytes" + "io/ioutil" + "net/http" + "net/url" + "os" - "github.com/iand/microdata" - ) + "github.com/iand/microdata" +) - func main() { +func main() { - baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata") + baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata") - resp, _ := http.Get(baseUrl.String()) - defer resp.Body.Close() + resp, _ := http.Get(baseUrl.String()) + defer resp.Body.Close() - html, _ := ioutil.ReadAll(resp.Body) + html, _ := ioutil.ReadAll(resp.Body) - p := microdata.NewParser(bytes.NewReader(html), baseUrl) + p := microdata.NewParser(bytes.NewReader(html), baseUrl) - data, _ := p.Parse() - - json, _ := data.JSON() - os.Stdout.Write(json) - } + data, _ := p.Parse() + json, _ := data.JSON() + os.Stdout.Write(json) +} +``` ## Authors * [Ian Davis](http://github.com/iand) - - ## Contributors - ## Contributing * Do submit your changes as a pull request diff --git a/microdata.go b/microdata.go index 8198518..3130fb3 100644 --- a/microdata.go +++ b/microdata.go @@ -18,12 +18,12 @@ import ( "golang.org/x/net/html/atom" ) -type ValueList []interface{} -type PropertyMap map[string]ValueList +type valueList []interface{} +type propertyMap map[string]valueList // Item represents a microdata item type Item struct { - Properties PropertyMap `json:"properties"` + Properties propertyMap `json:"properties"` Types []string `json:"type,omitempty"` ID string `json:"id,omitempty"` } @@ -31,7 +31,7 @@ type Item struct { // NewItem creates a new microdata item func NewItem() *Item { return &Item{ - Properties: make(PropertyMap, 0), + Properties: make(propertyMap, 0), Types: make([]string, 0), } } @@ -132,11 +132,10 @@ func (p *Parser) Parse() (*Microdata, error) { } // itemid only valid when itemscope and itemtype are both present if itemid, exists := getAttr("itemid", node); exists { - if parsedUrl, err := p.base.Parse(itemid); err == nil { - item.ID = parsedUrl.String() + if parsedURL, err := p.base.Parse(itemid); err == nil { + item.ID = parsedURL.String() } } - } if itemrefs, exists := getAttr("itemref", node); exists { @@ -168,7 +167,9 @@ func (p *Parser) readItem(item *Item, node *html.Node) { itemref = strings.TrimSpace(itemref) if refnode, exists := p.identifiedNodes[itemref]; exists { - p.readItem(subitem, refnode) + if refnode != node { + p.readItem(subitem, refnode) + } } } } @@ -198,15 +199,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) { } case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video: if urlValue, exists := getAttr("src", node); exists { - if parsedUrl, err := p.base.Parse(urlValue); err == nil { - propertyValue = parsedUrl.String() + if parsedURL, err := p.base.Parse(urlValue); err == nil { + propertyValue = parsedURL.String() } - } case atom.A, atom.Area, atom.Link: if urlValue, exists := getAttr("href", node); exists { - if parsedUrl, err := p.base.Parse(urlValue); err == nil { - propertyValue = parsedUrl.String() + if parsedURL, err := p.base.Parse(urlValue); err == nil { + propertyValue = parsedURL.String() } } case atom.Object: diff --git a/microdata_test.go b/microdata_test.go index c57ca09..01bdc97 100644 --- a/microdata_test.go +++ b/microdata_test.go @@ -8,6 +8,7 @@ package microdata import ( "bytes" "net/url" + "reflect" "strings" "testing" ) @@ -569,3 +570,30 @@ func TestJsonWithType(t *testing.T) { t.Errorf("Expecting %s but got %s", expected, actual) } } + +// This test checks stack overflow doesn't happen as mentioned in +// https://github.com/iand/microdata/issues/3 +func TestSkipSelfReferencingItemref(t *testing.T) { + html := ` + + + + ` + + actual := ParseData(html, t) + + child := NewItem() + child.AddString("title", "Foo") + child.AddString("url", "http://example.com/foo/bar") + + item := NewItem() + item.AddType("http://schema.org/WebPage") + item.AddItem("child", child) + + expected := NewMicrodata() + expected.AddItem(item) + + if !reflect.DeepEqual(expected, actual) { + t.Errorf("Expecting %s but got %s", expected, actual) + } +}