diff --git a/README.md b/README.md
index eb188eb..c891132 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
# microdata
+
A microdata parser in Go
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
@@ -9,79 +10,79 @@ See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more
Simply run
- go get github.com/iand/microdata
+ go get github.com/iand/microdata
Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata)
-
## Usage
Example of parsing a string containing HTML:
- package main
+```go
+package main
- import (
- "github.com/iand/microdata"
- "net/url"
- "strings"
- )
+import (
+ "github.com/iand/microdata"
+ "net/url"
+ "strings"
+)
- func main() {
- html := `
-
My name is Elizabeth.
-
`
+func main() {
+ html := `
+
My name is Elizabeth.
+
`
- baseUrl, _ := url.Parse("http://example.com/")
- p := microdata.NewParser(strings.NewReader(html), baseUrl)
+ baseUrl, _ := url.Parse("http://example.com/")
+ p := microdata.NewParser(strings.NewReader(html), baseUrl)
- data, err := p.Parse()
- if err != nil {
- panic(err)
- }
+ data, err := p.Parse()
+ if err != nil {
+ panic(err)
+ }
- println("Name: ", data.Items[0].Properties["name"][0].(string))
- }
+ println("Name: ", data.Items[0].Properties["name"][0].(string))
+}
+```
Extract microdata from a webpage and print the result as JSON
- package main
+```go
+package main
- import (
- "bytes"
- "io/ioutil"
- "net/http"
- "net/url"
- "os"
+import (
+ "bytes"
+ "io/ioutil"
+ "net/http"
+ "net/url"
+ "os"
- "github.com/iand/microdata"
- )
+ "github.com/iand/microdata"
+)
- func main() {
+func main() {
- baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
+ baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
- resp, _ := http.Get(baseUrl.String())
- defer resp.Body.Close()
+ resp, _ := http.Get(baseUrl.String())
+ defer resp.Body.Close()
- html, _ := ioutil.ReadAll(resp.Body)
+ html, _ := ioutil.ReadAll(resp.Body)
- p := microdata.NewParser(bytes.NewReader(html), baseUrl)
+ p := microdata.NewParser(bytes.NewReader(html), baseUrl)
- data, _ := p.Parse()
-
- json, _ := data.JSON()
- os.Stdout.Write(json)
- }
+ data, _ := p.Parse()
+ json, _ := data.JSON()
+ os.Stdout.Write(json)
+}
+```
## Authors
* [Ian Davis](http://github.com/iand) -
-
## Contributors
-
## Contributing
* Do submit your changes as a pull request
diff --git a/microdata.go b/microdata.go
index 8198518..3130fb3 100644
--- a/microdata.go
+++ b/microdata.go
@@ -18,12 +18,12 @@ import (
"golang.org/x/net/html/atom"
)
-type ValueList []interface{}
-type PropertyMap map[string]ValueList
+type valueList []interface{}
+type propertyMap map[string]valueList
// Item represents a microdata item
type Item struct {
- Properties PropertyMap `json:"properties"`
+ Properties propertyMap `json:"properties"`
Types []string `json:"type,omitempty"`
ID string `json:"id,omitempty"`
}
@@ -31,7 +31,7 @@ type Item struct {
// NewItem creates a new microdata item
func NewItem() *Item {
return &Item{
- Properties: make(PropertyMap, 0),
+ Properties: make(propertyMap, 0),
Types: make([]string, 0),
}
}
@@ -132,11 +132,10 @@ func (p *Parser) Parse() (*Microdata, error) {
}
// itemid only valid when itemscope and itemtype are both present
if itemid, exists := getAttr("itemid", node); exists {
- if parsedUrl, err := p.base.Parse(itemid); err == nil {
- item.ID = parsedUrl.String()
+ if parsedURL, err := p.base.Parse(itemid); err == nil {
+ item.ID = parsedURL.String()
}
}
-
}
if itemrefs, exists := getAttr("itemref", node); exists {
@@ -168,7 +167,9 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
itemref = strings.TrimSpace(itemref)
if refnode, exists := p.identifiedNodes[itemref]; exists {
- p.readItem(subitem, refnode)
+ if refnode != node {
+ p.readItem(subitem, refnode)
+ }
}
}
}
@@ -198,15 +199,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
}
case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
if urlValue, exists := getAttr("src", node); exists {
- if parsedUrl, err := p.base.Parse(urlValue); err == nil {
- propertyValue = parsedUrl.String()
+ if parsedURL, err := p.base.Parse(urlValue); err == nil {
+ propertyValue = parsedURL.String()
}
-
}
case atom.A, atom.Area, atom.Link:
if urlValue, exists := getAttr("href", node); exists {
- if parsedUrl, err := p.base.Parse(urlValue); err == nil {
- propertyValue = parsedUrl.String()
+ if parsedURL, err := p.base.Parse(urlValue); err == nil {
+ propertyValue = parsedURL.String()
}
}
case atom.Object:
diff --git a/microdata_test.go b/microdata_test.go
index c57ca09..01bdc97 100644
--- a/microdata_test.go
+++ b/microdata_test.go
@@ -8,6 +8,7 @@ package microdata
import (
"bytes"
"net/url"
+ "reflect"
"strings"
"testing"
)
@@ -569,3 +570,30 @@ func TestJsonWithType(t *testing.T) {
t.Errorf("Expecting %s but got %s", expected, actual)
}
}
+
+// This test checks stack overflow doesn't happen as mentioned in
+// https://github.com/iand/microdata/issues/3
+func TestSkipSelfReferencingItemref(t *testing.T) {
+ html := `
+
+ Foo
+
+ `
+
+ actual := ParseData(html, t)
+
+ child := NewItem()
+ child.AddString("title", "Foo")
+ child.AddString("url", "http://example.com/foo/bar")
+
+ item := NewItem()
+ item.AddType("http://schema.org/WebPage")
+ item.AddItem("child", child)
+
+ expected := NewMicrodata()
+ expected.AddItem(item)
+
+ if !reflect.DeepEqual(expected, actual) {
+ t.Errorf("Expecting %s but got %s", expected, actual)
+ }
+}