Merge pull request #4 from mcnijman/master
Fix #3: fatal stack overflow
This commit is contained in:
		
						commit
						f416fa49b8
					
				
							
								
								
									
										85
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										85
									
								
								README.md
									
									
									
									
									
								
							@ -1,4 +1,5 @@
 | 
			
		||||
# microdata
 | 
			
		||||
 | 
			
		||||
A microdata parser in Go
 | 
			
		||||
 | 
			
		||||
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
 | 
			
		||||
@ -9,79 +10,79 @@ See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more
 | 
			
		||||
 | 
			
		||||
Simply run
 | 
			
		||||
 | 
			
		||||
	go get github.com/iand/microdata
 | 
			
		||||
    go get github.com/iand/microdata
 | 
			
		||||
 | 
			
		||||
Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Usage
 | 
			
		||||
 | 
			
		||||
Example of parsing a string containing HTML:
 | 
			
		||||
 | 
			
		||||
	package main
 | 
			
		||||
```go
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
	import (
 | 
			
		||||
		"github.com/iand/microdata"
 | 
			
		||||
		"net/url"
 | 
			
		||||
		"strings"
 | 
			
		||||
	)
 | 
			
		||||
import (
 | 
			
		||||
    "github.com/iand/microdata"
 | 
			
		||||
    "net/url"
 | 
			
		||||
    "strings"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
	func main() {
 | 
			
		||||
		html := `<div itemscope>
 | 
			
		||||
		 <p>My name is <span itemprop="name">Elizabeth</span>.</p>
 | 
			
		||||
		</div>`
 | 
			
		||||
func main() {
 | 
			
		||||
    html := `<div itemscope>
 | 
			
		||||
        <p>My name is <span itemprop="name">Elizabeth</span>.</p>
 | 
			
		||||
    </div>`
 | 
			
		||||
 | 
			
		||||
		baseUrl, _ := url.Parse("http://example.com/")
 | 
			
		||||
		p := microdata.NewParser(strings.NewReader(html), baseUrl)
 | 
			
		||||
    baseUrl, _ := url.Parse("http://example.com/")
 | 
			
		||||
    p := microdata.NewParser(strings.NewReader(html), baseUrl)
 | 
			
		||||
 | 
			
		||||
		data, err := p.Parse()
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			panic(err)
 | 
			
		||||
		}
 | 
			
		||||
    data, err := p.Parse()
 | 
			
		||||
    if err != nil {
 | 
			
		||||
        panic(err)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
		println("Name: ", data.Items[0].Properties["name"][0].(string))
 | 
			
		||||
	}
 | 
			
		||||
    println("Name: ", data.Items[0].Properties["name"][0].(string))
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Extract microdata from a webpage and print the result as JSON
 | 
			
		||||
 | 
			
		||||
	package main
 | 
			
		||||
```go
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
	import (
 | 
			
		||||
	    "bytes"
 | 
			
		||||
	    "io/ioutil"
 | 
			
		||||
	    "net/http"
 | 
			
		||||
	    "net/url"
 | 
			
		||||
	    "os"
 | 
			
		||||
import (
 | 
			
		||||
    "bytes"
 | 
			
		||||
    "io/ioutil"
 | 
			
		||||
    "net/http"
 | 
			
		||||
    "net/url"
 | 
			
		||||
    "os"
 | 
			
		||||
 | 
			
		||||
	    "github.com/iand/microdata"
 | 
			
		||||
	)
 | 
			
		||||
    "github.com/iand/microdata"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
	func main() {
 | 
			
		||||
func main() {
 | 
			
		||||
 | 
			
		||||
	    baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
 | 
			
		||||
    baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
 | 
			
		||||
 | 
			
		||||
	    resp, _ := http.Get(baseUrl.String())
 | 
			
		||||
	    defer resp.Body.Close()
 | 
			
		||||
    resp, _ := http.Get(baseUrl.String())
 | 
			
		||||
    defer resp.Body.Close()
 | 
			
		||||
 | 
			
		||||
	    html, _ := ioutil.ReadAll(resp.Body)
 | 
			
		||||
    html, _ := ioutil.ReadAll(resp.Body)
 | 
			
		||||
 | 
			
		||||
	    p := microdata.NewParser(bytes.NewReader(html), baseUrl)
 | 
			
		||||
    p := microdata.NewParser(bytes.NewReader(html), baseUrl)
 | 
			
		||||
 | 
			
		||||
	    data, _ := p.Parse()
 | 
			
		||||
 | 
			
		||||
	    json, _ := data.JSON()
 | 
			
		||||
	    os.Stdout.Write(json)
 | 
			
		||||
	}
 | 
			
		||||
    data, _ := p.Parse()
 | 
			
		||||
 | 
			
		||||
    json, _ := data.JSON()
 | 
			
		||||
    os.Stdout.Write(json)
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Authors
 | 
			
		||||
 | 
			
		||||
* [Ian Davis](http://github.com/iand) - <http://iandavis.com/>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Contributors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Contributing
 | 
			
		||||
 | 
			
		||||
* Do submit your changes as a pull request
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										26
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								microdata.go
									
									
									
									
									
								
							@ -18,12 +18,12 @@ import (
 | 
			
		||||
	"golang.org/x/net/html/atom"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type ValueList []interface{}
 | 
			
		||||
type PropertyMap map[string]ValueList
 | 
			
		||||
type valueList []interface{}
 | 
			
		||||
type propertyMap map[string]valueList
 | 
			
		||||
 | 
			
		||||
// Item represents a microdata item
 | 
			
		||||
type Item struct {
 | 
			
		||||
	Properties PropertyMap `json:"properties"`
 | 
			
		||||
	Properties propertyMap `json:"properties"`
 | 
			
		||||
	Types      []string    `json:"type,omitempty"`
 | 
			
		||||
	ID         string      `json:"id,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
@ -31,7 +31,7 @@ type Item struct {
 | 
			
		||||
// NewItem creates a new microdata item
 | 
			
		||||
func NewItem() *Item {
 | 
			
		||||
	return &Item{
 | 
			
		||||
		Properties: make(PropertyMap, 0),
 | 
			
		||||
		Properties: make(propertyMap, 0),
 | 
			
		||||
		Types:      make([]string, 0),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@ -132,11 +132,10 @@ func (p *Parser) Parse() (*Microdata, error) {
 | 
			
		||||
			}
 | 
			
		||||
			// itemid only valid when itemscope and itemtype are both present
 | 
			
		||||
			if itemid, exists := getAttr("itemid", node); exists {
 | 
			
		||||
				if parsedUrl, err := p.base.Parse(itemid); err == nil {
 | 
			
		||||
					item.ID = parsedUrl.String()
 | 
			
		||||
				if parsedURL, err := p.base.Parse(itemid); err == nil {
 | 
			
		||||
					item.ID = parsedURL.String()
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if itemrefs, exists := getAttr("itemref", node); exists {
 | 
			
		||||
@ -168,7 +167,9 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
 | 
			
		||||
					itemref = strings.TrimSpace(itemref)
 | 
			
		||||
 | 
			
		||||
					if refnode, exists := p.identifiedNodes[itemref]; exists {
 | 
			
		||||
						p.readItem(subitem, refnode)
 | 
			
		||||
						if refnode != node {
 | 
			
		||||
							p.readItem(subitem, refnode)
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
@ -198,15 +199,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
 | 
			
		||||
			}
 | 
			
		||||
		case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
 | 
			
		||||
			if urlValue, exists := getAttr("src", node); exists {
 | 
			
		||||
				if parsedUrl, err := p.base.Parse(urlValue); err == nil {
 | 
			
		||||
					propertyValue = parsedUrl.String()
 | 
			
		||||
				if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
			
		||||
					propertyValue = parsedURL.String()
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			}
 | 
			
		||||
		case atom.A, atom.Area, atom.Link:
 | 
			
		||||
			if urlValue, exists := getAttr("href", node); exists {
 | 
			
		||||
				if parsedUrl, err := p.base.Parse(urlValue); err == nil {
 | 
			
		||||
					propertyValue = parsedUrl.String()
 | 
			
		||||
				if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
			
		||||
					propertyValue = parsedURL.String()
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		case atom.Object:
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,7 @@ package microdata
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
@ -569,3 +570,30 @@ func TestJsonWithType(t *testing.T) {
 | 
			
		||||
		t.Errorf("Expecting %s but got %s", expected, actual)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This test checks stack overflow doesn't happen as mentioned in
 | 
			
		||||
// https://github.com/iand/microdata/issues/3
 | 
			
		||||
func TestSkipSelfReferencingItemref(t *testing.T) {
 | 
			
		||||
	html := `<body itemscope itemtype="http://schema.org/WebPage">
 | 
			
		||||
	  <span id="1" itemscope itemtype="http://data-vocabulary.org/Breadcrumb" itemprop="child" itemref="1">
 | 
			
		||||
	    <a title="Foo" itemprop="url" href="/foo/bar"><span itemprop="title">Foo</span></a>
 | 
			
		||||
	  </span>
 | 
			
		||||
	</body>`
 | 
			
		||||
 | 
			
		||||
	actual := ParseData(html, t)
 | 
			
		||||
 | 
			
		||||
	child := NewItem()
 | 
			
		||||
	child.AddString("title", "Foo")
 | 
			
		||||
	child.AddString("url", "http://example.com/foo/bar")
 | 
			
		||||
 | 
			
		||||
	item := NewItem()
 | 
			
		||||
	item.AddType("http://schema.org/WebPage")
 | 
			
		||||
	item.AddItem("child", child)
 | 
			
		||||
 | 
			
		||||
	expected := NewMicrodata()
 | 
			
		||||
	expected.AddItem(item)
 | 
			
		||||
 | 
			
		||||
	if !reflect.DeepEqual(expected, actual) {
 | 
			
		||||
		t.Errorf("Expecting %s but got %s", expected, actual)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user