forked from ukamnya/microdata_mirror
		
	Merge pull request #4 from mcnijman/master
Fix #3: fatal stack overflow
This commit is contained in:
		
						commit
						f416fa49b8
					
				
							
								
								
									
										29
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								README.md
									
									
									
									
									
								
							@ -1,4 +1,5 @@
 | 
				
			|||||||
# microdata
 | 
					# microdata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A microdata parser in Go
 | 
					A microdata parser in Go
 | 
				
			||||||
 | 
					
 | 
				
			||||||
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
 | 
					See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
 | 
				
			||||||
@ -13,20 +14,20 @@ Simply run
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata)
 | 
					Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
## Usage
 | 
					## Usage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Example of parsing a string containing HTML:
 | 
					Example of parsing a string containing HTML:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	package main
 | 
					```go
 | 
				
			||||||
 | 
					package main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	import (
 | 
					import (
 | 
				
			||||||
    "github.com/iand/microdata"
 | 
					    "github.com/iand/microdata"
 | 
				
			||||||
    "net/url"
 | 
					    "net/url"
 | 
				
			||||||
    "strings"
 | 
					    "strings"
 | 
				
			||||||
	)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	func main() {
 | 
					func main() {
 | 
				
			||||||
    html := `<div itemscope>
 | 
					    html := `<div itemscope>
 | 
				
			||||||
        <p>My name is <span itemprop="name">Elizabeth</span>.</p>
 | 
					        <p>My name is <span itemprop="name">Elizabeth</span>.</p>
 | 
				
			||||||
    </div>`
 | 
					    </div>`
 | 
				
			||||||
@ -40,13 +41,15 @@ Example of parsing a string containing HTML:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    println("Name: ", data.Items[0].Properties["name"][0].(string))
 | 
					    println("Name: ", data.Items[0].Properties["name"][0].(string))
 | 
				
			||||||
	}
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Extract microdata from a webpage and print the result as JSON
 | 
					Extract microdata from a webpage and print the result as JSON
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	package main
 | 
					```go
 | 
				
			||||||
 | 
					package main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	import (
 | 
					import (
 | 
				
			||||||
    "bytes"
 | 
					    "bytes"
 | 
				
			||||||
    "io/ioutil"
 | 
					    "io/ioutil"
 | 
				
			||||||
    "net/http"
 | 
					    "net/http"
 | 
				
			||||||
@ -54,9 +57,9 @@ Extract microdata from a webpage and print the result as JSON
 | 
				
			|||||||
    "os"
 | 
					    "os"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    "github.com/iand/microdata"
 | 
					    "github.com/iand/microdata"
 | 
				
			||||||
	)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	func main() {
 | 
					func main() {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
 | 
					    baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -71,17 +74,15 @@ Extract microdata from a webpage and print the result as JSON
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    json, _ := data.JSON()
 | 
					    json, _ := data.JSON()
 | 
				
			||||||
    os.Stdout.Write(json)
 | 
					    os.Stdout.Write(json)
 | 
				
			||||||
	}
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Authors
 | 
					## Authors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
* [Ian Davis](http://github.com/iand) - <http://iandavis.com/>
 | 
					* [Ian Davis](http://github.com/iand) - <http://iandavis.com/>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
## Contributors
 | 
					## Contributors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
## Contributing
 | 
					## Contributing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
* Do submit your changes as a pull request
 | 
					* Do submit your changes as a pull request
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										24
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								microdata.go
									
									
									
									
									
								
							@ -18,12 +18,12 @@ import (
 | 
				
			|||||||
	"golang.org/x/net/html/atom"
 | 
						"golang.org/x/net/html/atom"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type ValueList []interface{}
 | 
					type valueList []interface{}
 | 
				
			||||||
type PropertyMap map[string]ValueList
 | 
					type propertyMap map[string]valueList
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Item represents a microdata item
 | 
					// Item represents a microdata item
 | 
				
			||||||
type Item struct {
 | 
					type Item struct {
 | 
				
			||||||
	Properties PropertyMap `json:"properties"`
 | 
						Properties propertyMap `json:"properties"`
 | 
				
			||||||
	Types      []string    `json:"type,omitempty"`
 | 
						Types      []string    `json:"type,omitempty"`
 | 
				
			||||||
	ID         string      `json:"id,omitempty"`
 | 
						ID         string      `json:"id,omitempty"`
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -31,7 +31,7 @@ type Item struct {
 | 
				
			|||||||
// NewItem creates a new microdata item
 | 
					// NewItem creates a new microdata item
 | 
				
			||||||
func NewItem() *Item {
 | 
					func NewItem() *Item {
 | 
				
			||||||
	return &Item{
 | 
						return &Item{
 | 
				
			||||||
		Properties: make(PropertyMap, 0),
 | 
							Properties: make(propertyMap, 0),
 | 
				
			||||||
		Types:      make([]string, 0),
 | 
							Types:      make([]string, 0),
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -132,11 +132,10 @@ func (p *Parser) Parse() (*Microdata, error) {
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
			// itemid only valid when itemscope and itemtype are both present
 | 
								// itemid only valid when itemscope and itemtype are both present
 | 
				
			||||||
			if itemid, exists := getAttr("itemid", node); exists {
 | 
								if itemid, exists := getAttr("itemid", node); exists {
 | 
				
			||||||
				if parsedUrl, err := p.base.Parse(itemid); err == nil {
 | 
									if parsedURL, err := p.base.Parse(itemid); err == nil {
 | 
				
			||||||
					item.ID = parsedUrl.String()
 | 
										item.ID = parsedURL.String()
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if itemrefs, exists := getAttr("itemref", node); exists {
 | 
							if itemrefs, exists := getAttr("itemref", node); exists {
 | 
				
			||||||
@ -168,10 +167,12 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
 | 
				
			|||||||
					itemref = strings.TrimSpace(itemref)
 | 
										itemref = strings.TrimSpace(itemref)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					if refnode, exists := p.identifiedNodes[itemref]; exists {
 | 
										if refnode, exists := p.identifiedNodes[itemref]; exists {
 | 
				
			||||||
 | 
											if refnode != node {
 | 
				
			||||||
							p.readItem(subitem, refnode)
 | 
												p.readItem(subitem, refnode)
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for child := node.FirstChild; child != nil; {
 | 
								for child := node.FirstChild; child != nil; {
 | 
				
			||||||
				p.readItem(subitem, child)
 | 
									p.readItem(subitem, child)
 | 
				
			||||||
@ -198,15 +199,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
		case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
 | 
							case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
 | 
				
			||||||
			if urlValue, exists := getAttr("src", node); exists {
 | 
								if urlValue, exists := getAttr("src", node); exists {
 | 
				
			||||||
				if parsedUrl, err := p.base.Parse(urlValue); err == nil {
 | 
									if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
				
			||||||
					propertyValue = parsedUrl.String()
 | 
										propertyValue = parsedURL.String()
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
					 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		case atom.A, atom.Area, atom.Link:
 | 
							case atom.A, atom.Area, atom.Link:
 | 
				
			||||||
			if urlValue, exists := getAttr("href", node); exists {
 | 
								if urlValue, exists := getAttr("href", node); exists {
 | 
				
			||||||
				if parsedUrl, err := p.base.Parse(urlValue); err == nil {
 | 
									if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
				
			||||||
					propertyValue = parsedUrl.String()
 | 
										propertyValue = parsedURL.String()
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		case atom.Object:
 | 
							case atom.Object:
 | 
				
			||||||
 | 
				
			|||||||
@ -8,6 +8,7 @@ package microdata
 | 
				
			|||||||
import (
 | 
					import (
 | 
				
			||||||
	"bytes"
 | 
						"bytes"
 | 
				
			||||||
	"net/url"
 | 
						"net/url"
 | 
				
			||||||
 | 
						"reflect"
 | 
				
			||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
	"testing"
 | 
						"testing"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
@ -569,3 +570,30 @@ func TestJsonWithType(t *testing.T) {
 | 
				
			|||||||
		t.Errorf("Expecting %s but got %s", expected, actual)
 | 
							t.Errorf("Expecting %s but got %s", expected, actual)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// This test checks stack overflow doesn't happen as mentioned in
 | 
				
			||||||
 | 
					// https://github.com/iand/microdata/issues/3
 | 
				
			||||||
 | 
					func TestSkipSelfReferencingItemref(t *testing.T) {
 | 
				
			||||||
 | 
						html := `<body itemscope itemtype="http://schema.org/WebPage">
 | 
				
			||||||
 | 
						  <span id="1" itemscope itemtype="http://data-vocabulary.org/Breadcrumb" itemprop="child" itemref="1">
 | 
				
			||||||
 | 
						    <a title="Foo" itemprop="url" href="/foo/bar"><span itemprop="title">Foo</span></a>
 | 
				
			||||||
 | 
						  </span>
 | 
				
			||||||
 | 
						</body>`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						actual := ParseData(html, t)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						child := NewItem()
 | 
				
			||||||
 | 
						child.AddString("title", "Foo")
 | 
				
			||||||
 | 
						child.AddString("url", "http://example.com/foo/bar")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						item := NewItem()
 | 
				
			||||||
 | 
						item.AddType("http://schema.org/WebPage")
 | 
				
			||||||
 | 
						item.AddItem("child", child)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						expected := NewMicrodata()
 | 
				
			||||||
 | 
						expected.AddItem(item)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if !reflect.DeepEqual(expected, actual) {
 | 
				
			||||||
 | 
							t.Errorf("Expecting %s but got %s", expected, actual)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user