commit
f416fa49b8
85
README.md
85
README.md
|
@ -1,4 +1,5 @@
|
|||
# microdata
|
||||
|
||||
A microdata parser in Go
|
||||
|
||||
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
|
||||
|
@ -9,79 +10,79 @@ See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more
|
|||
|
||||
Simply run
|
||||
|
||||
go get github.com/iand/microdata
|
||||
go get github.com/iand/microdata
|
||||
|
||||
Documentation is at [http://godoc.org/github.com/iand/microdata](http://godoc.org/github.com/iand/microdata)
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
Example of parsing a string containing HTML:
|
||||
|
||||
package main
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/iand/microdata"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
import (
|
||||
"github.com/iand/microdata"
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
html := `<div itemscope>
|
||||
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
|
||||
</div>`
|
||||
func main() {
|
||||
html := `<div itemscope>
|
||||
<p>My name is <span itemprop="name">Elizabeth</span>.</p>
|
||||
</div>`
|
||||
|
||||
baseUrl, _ := url.Parse("http://example.com/")
|
||||
p := microdata.NewParser(strings.NewReader(html), baseUrl)
|
||||
baseUrl, _ := url.Parse("http://example.com/")
|
||||
p := microdata.NewParser(strings.NewReader(html), baseUrl)
|
||||
|
||||
data, err := p.Parse()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
data, err := p.Parse()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
println("Name: ", data.Items[0].Properties["name"][0].(string))
|
||||
}
|
||||
println("Name: ", data.Items[0].Properties["name"][0].(string))
|
||||
}
|
||||
```
|
||||
|
||||
Extract microdata from a webpage and print the result as JSON
|
||||
|
||||
package main
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
|
||||
"github.com/iand/microdata"
|
||||
)
|
||||
"github.com/iand/microdata"
|
||||
)
|
||||
|
||||
func main() {
|
||||
func main() {
|
||||
|
||||
baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
|
||||
baseUrl, _ := url.Parse("http://www.designhive.com/blog/using-schemaorg-microdata")
|
||||
|
||||
resp, _ := http.Get(baseUrl.String())
|
||||
defer resp.Body.Close()
|
||||
resp, _ := http.Get(baseUrl.String())
|
||||
defer resp.Body.Close()
|
||||
|
||||
html, _ := ioutil.ReadAll(resp.Body)
|
||||
html, _ := ioutil.ReadAll(resp.Body)
|
||||
|
||||
p := microdata.NewParser(bytes.NewReader(html), baseUrl)
|
||||
p := microdata.NewParser(bytes.NewReader(html), baseUrl)
|
||||
|
||||
data, _ := p.Parse()
|
||||
|
||||
json, _ := data.JSON()
|
||||
os.Stdout.Write(json)
|
||||
}
|
||||
data, _ := p.Parse()
|
||||
|
||||
json, _ := data.JSON()
|
||||
os.Stdout.Write(json)
|
||||
}
|
||||
```
|
||||
|
||||
## Authors
|
||||
|
||||
* [Ian Davis](http://github.com/iand) - <http://iandavis.com/>
|
||||
|
||||
|
||||
## Contributors
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
* Do submit your changes as a pull request
|
||||
|
|
26
microdata.go
26
microdata.go
|
@ -18,12 +18,12 @@ import (
|
|||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
type ValueList []interface{}
|
||||
type PropertyMap map[string]ValueList
|
||||
type valueList []interface{}
|
||||
type propertyMap map[string]valueList
|
||||
|
||||
// Item represents a microdata item
|
||||
type Item struct {
|
||||
Properties PropertyMap `json:"properties"`
|
||||
Properties propertyMap `json:"properties"`
|
||||
Types []string `json:"type,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ type Item struct {
|
|||
// NewItem creates a new microdata item
|
||||
func NewItem() *Item {
|
||||
return &Item{
|
||||
Properties: make(PropertyMap, 0),
|
||||
Properties: make(propertyMap, 0),
|
||||
Types: make([]string, 0),
|
||||
}
|
||||
}
|
||||
|
@ -132,11 +132,10 @@ func (p *Parser) Parse() (*Microdata, error) {
|
|||
}
|
||||
// itemid only valid when itemscope and itemtype are both present
|
||||
if itemid, exists := getAttr("itemid", node); exists {
|
||||
if parsedUrl, err := p.base.Parse(itemid); err == nil {
|
||||
item.ID = parsedUrl.String()
|
||||
if parsedURL, err := p.base.Parse(itemid); err == nil {
|
||||
item.ID = parsedURL.String()
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if itemrefs, exists := getAttr("itemref", node); exists {
|
||||
|
@ -168,7 +167,9 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
|
|||
itemref = strings.TrimSpace(itemref)
|
||||
|
||||
if refnode, exists := p.identifiedNodes[itemref]; exists {
|
||||
p.readItem(subitem, refnode)
|
||||
if refnode != node {
|
||||
p.readItem(subitem, refnode)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -198,15 +199,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) {
|
|||
}
|
||||
case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
|
||||
if urlValue, exists := getAttr("src", node); exists {
|
||||
if parsedUrl, err := p.base.Parse(urlValue); err == nil {
|
||||
propertyValue = parsedUrl.String()
|
||||
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
||||
propertyValue = parsedURL.String()
|
||||
}
|
||||
|
||||
}
|
||||
case atom.A, atom.Area, atom.Link:
|
||||
if urlValue, exists := getAttr("href", node); exists {
|
||||
if parsedUrl, err := p.base.Parse(urlValue); err == nil {
|
||||
propertyValue = parsedUrl.String()
|
||||
if parsedURL, err := p.base.Parse(urlValue); err == nil {
|
||||
propertyValue = parsedURL.String()
|
||||
}
|
||||
}
|
||||
case atom.Object:
|
||||
|
|
|
@ -8,6 +8,7 @@ package microdata
|
|||
import (
|
||||
"bytes"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
@ -569,3 +570,30 @@ func TestJsonWithType(t *testing.T) {
|
|||
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
// This test checks stack overflow doesn't happen as mentioned in
|
||||
// https://github.com/iand/microdata/issues/3
|
||||
func TestSkipSelfReferencingItemref(t *testing.T) {
|
||||
html := `<body itemscope itemtype="http://schema.org/WebPage">
|
||||
<span id="1" itemscope itemtype="http://data-vocabulary.org/Breadcrumb" itemprop="child" itemref="1">
|
||||
<a title="Foo" itemprop="url" href="/foo/bar"><span itemprop="title">Foo</span></a>
|
||||
</span>
|
||||
</body>`
|
||||
|
||||
actual := ParseData(html, t)
|
||||
|
||||
child := NewItem()
|
||||
child.AddString("title", "Foo")
|
||||
child.AddString("url", "http://example.com/foo/bar")
|
||||
|
||||
item := NewItem()
|
||||
item.AddType("http://schema.org/WebPage")
|
||||
item.AddItem("child", child)
|
||||
|
||||
expected := NewMicrodata()
|
||||
expected.AddItem(item)
|
||||
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue