forked from ukamnya/microdata_mirror
microdata: general cleanup and add travis config
parent
2433e40d1e
commit
8306de17cc
|
@ -0,0 +1,9 @@
|
||||||
|
language: go
|
||||||
|
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- go: 1.6.3
|
||||||
|
- go: 1.7.5
|
||||||
|
- go: 1.8.2
|
||||||
|
- go: 1.9
|
||||||
|
- go: master
|
|
@ -3,6 +3,8 @@ A microdata parser in Go
|
||||||
|
|
||||||
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
|
See [http://www.w3.org/TR/microdata/](http://www.w3.org/TR/microdata/) for more information about Microdata
|
||||||
|
|
||||||
|
[![Build Status](https://travis-ci.org/iand/microdata.svg?branch=master)](https://travis-ci.org/iand/microdata)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
Simply run
|
Simply run
|
||||||
|
@ -86,7 +88,7 @@ Extract microdata from a webpage and print the result as JSON
|
||||||
* Do your best to adhere to the existing coding conventions and idioms.
|
* Do your best to adhere to the existing coding conventions and idioms.
|
||||||
* Do run `go fmt` on the code before committing
|
* Do run `go fmt` on the code before committing
|
||||||
* Do feel free to add yourself to the [`CREDITS`](CREDITS) file and the
|
* Do feel free to add yourself to the [`CREDITS`](CREDITS) file and the
|
||||||
corresponding Contributors list in the the [`README.md`](README.md).
|
corresponding Contributors list in the [`README.md`](README.md).
|
||||||
Alphabetical order applies.
|
Alphabetical order applies.
|
||||||
* Don't touch the [`AUTHORS`](AUTHORS) file. An existing author will add you if
|
* Don't touch the [`AUTHORS`](AUTHORS) file. An existing author will add you if
|
||||||
your contributions are significant enough.
|
your contributions are significant enough.
|
||||||
|
|
26
microdata.go
26
microdata.go
|
@ -3,7 +3,7 @@
|
||||||
information, see <http://unlicense.org/> or the accompanying UNLICENSE file.
|
information, see <http://unlicense.org/> or the accompanying UNLICENSE file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// A package for parsing microdata
|
// Package microdata provides types and functions for paring microdata from web pages.
|
||||||
// See http://www.w3.org/TR/microdata/ for more information about Microdata
|
// See http://www.w3.org/TR/microdata/ for more information about Microdata
|
||||||
package microdata
|
package microdata
|
||||||
|
|
||||||
|
@ -21,14 +21,14 @@ import (
|
||||||
type ValueList []interface{}
|
type ValueList []interface{}
|
||||||
type PropertyMap map[string]ValueList
|
type PropertyMap map[string]ValueList
|
||||||
|
|
||||||
// Represents a microdata item
|
// Item represents a microdata item
|
||||||
type Item struct {
|
type Item struct {
|
||||||
Properties PropertyMap `json:"properties"`
|
Properties PropertyMap `json:"properties"`
|
||||||
Types []string `json:"type,omitempty"`
|
Types []string `json:"type,omitempty"`
|
||||||
ID string `json:"id,omitempty"`
|
ID string `json:"id,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new microdata item
|
// NewItem creates a new microdata item
|
||||||
func NewItem() *Item {
|
func NewItem() *Item {
|
||||||
return &Item{
|
return &Item{
|
||||||
Properties: make(PropertyMap, 0),
|
Properties: make(PropertyMap, 0),
|
||||||
|
@ -36,40 +36,40 @@ func NewItem() *Item {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a string type item property value
|
// AddString adds a string type item property value
|
||||||
func (i *Item) AddString(property string, value string) {
|
func (i *Item) AddString(property string, value string) {
|
||||||
i.Properties[property] = append(i.Properties[property], value)
|
i.Properties[property] = append(i.Properties[property], value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add an Item type item property value
|
// AddItem adds an Item type item property value
|
||||||
func (i *Item) AddItem(property string, value *Item) {
|
func (i *Item) AddItem(property string, value *Item) {
|
||||||
i.Properties[property] = append(i.Properties[property], value)
|
i.Properties[property] = append(i.Properties[property], value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a type to the item
|
// AddType adds a type to the item
|
||||||
func (i *Item) AddType(value string) {
|
func (i *Item) AddType(value string) {
|
||||||
i.Types = append(i.Types, value)
|
i.Types = append(i.Types, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Represents a set of microdata items
|
// Microdata represents a set of microdata items
|
||||||
type Microdata struct {
|
type Microdata struct {
|
||||||
Items []*Item `json:"items"`
|
Items []*Item `json:"items"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new microdata set
|
// NewMicrodata creates a new microdata set
|
||||||
func NewMicrodata() *Microdata {
|
func NewMicrodata() *Microdata {
|
||||||
return &Microdata{
|
return &Microdata{
|
||||||
Items: make([]*Item, 0),
|
Items: make([]*Item, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add an item to the microdata set
|
// AddItem adds an item to the microdata set
|
||||||
func (m *Microdata) AddItem(value *Item) {
|
func (m *Microdata) AddItem(value *Item) {
|
||||||
m.Items = append(m.Items, value)
|
m.Items = append(m.Items, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert the microdata set to JSON
|
// JSON converts the microdata set to JSON
|
||||||
func (m *Microdata) Json() ([]byte, error) {
|
func (m *Microdata) JSON() ([]byte, error) {
|
||||||
b, err := json.Marshal(m)
|
b, err := json.Marshal(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -77,7 +77,7 @@ func (m *Microdata) Json() ([]byte, error) {
|
||||||
return b, nil
|
return b, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// An HTML parser that extracts microdata
|
// Parser is an HTML parser that extracts microdata
|
||||||
type Parser struct {
|
type Parser struct {
|
||||||
r io.Reader
|
r io.Reader
|
||||||
data *Microdata
|
data *Microdata
|
||||||
|
@ -85,7 +85,7 @@ type Parser struct {
|
||||||
identifiedNodes map[string]*html.Node
|
identifiedNodes map[string]*html.Node
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new parser for extracting microdata
|
// NewParser creates a new parser for extracting microdata
|
||||||
// r is a reader over an HTML document
|
// r is a reader over an HTML document
|
||||||
// base is the base URL for resolving relative URLs
|
// base is the base URL for resolving relative URLs
|
||||||
func NewParser(r io.Reader, base *url.URL) *Parser {
|
func NewParser(r io.Reader, base *url.URL) *Parser {
|
||||||
|
|
|
@ -324,7 +324,7 @@ func TestParseItemType(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if item.Types[0] != "http://example.org/animals#cat" {
|
if item.Types[0] != "http://example.org/animals#cat" {
|
||||||
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d", item.Types[0])
|
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %s", item.Types[0])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -340,10 +340,10 @@ func TestParseMultipleItemTypes(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if item.Types[0] != "http://example.org/animals#mammal" {
|
if item.Types[0] != "http://example.org/animals#mammal" {
|
||||||
t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %d", item.Types[0])
|
t.Errorf("Expecting type of 'http://example.org/animals#mammal' but got %s", item.Types[0])
|
||||||
}
|
}
|
||||||
if item.Types[1] != "http://example.org/animals#cat" {
|
if item.Types[1] != "http://example.org/animals#cat" {
|
||||||
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %d", item.Types[1])
|
t.Errorf("Expecting type of 'http://example.org/animals#cat' but got %s", item.Types[1])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,7 +362,7 @@ func TestParseItemId(t *testing.T) {
|
||||||
item := ParseOneItem(html, t)
|
item := ParseOneItem(html, t)
|
||||||
|
|
||||||
if item.ID != "urn:isbn:0-330-34032-8" {
|
if item.ID != "urn:isbn:0-330-34032-8" {
|
||||||
t.Errorf("Expecting id of 'urn:isbn:0-330-34032-8' but got %d", item.ID)
|
t.Errorf("Expecting id of 'urn:isbn:0-330-34032-8' but got %s", item.ID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -533,11 +533,11 @@ func TestParseItemRelativeId(t *testing.T) {
|
||||||
item := ParseOneItem(html, t)
|
item := ParseOneItem(html, t)
|
||||||
|
|
||||||
if item.ID != "http://example.com/foo" {
|
if item.ID != "http://example.com/foo" {
|
||||||
t.Errorf("Expecting id of 'http://example.com/foo' but got %d", item.ID)
|
t.Errorf("Expecting id of 'http://example.com/foo' but got %s", item.ID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestJson(t *testing.T) {
|
func TestJSON(t *testing.T) {
|
||||||
item := NewItem()
|
item := NewItem()
|
||||||
item.AddString("name", "Elizabeth")
|
item.AddString("name", "Elizabeth")
|
||||||
|
|
||||||
|
@ -546,7 +546,7 @@ func TestJson(t *testing.T) {
|
||||||
|
|
||||||
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]}}]}`)
|
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]}}]}`)
|
||||||
|
|
||||||
actual, _ := data.Json()
|
actual, _ := data.JSON()
|
||||||
|
|
||||||
if !bytes.Equal(actual, expected) {
|
if !bytes.Equal(actual, expected) {
|
||||||
t.Errorf("Expecting %s but got %s", expected, actual)
|
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||||
|
@ -563,7 +563,7 @@ func TestJsonWithType(t *testing.T) {
|
||||||
|
|
||||||
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]},"type":["http://example.org/animals#cat"]}]}`)
|
expected := []byte(`{"items":[{"properties":{"name":["Elizabeth"]},"type":["http://example.org/animals#cat"]}]}`)
|
||||||
|
|
||||||
actual, _ := data.Json()
|
actual, _ := data.JSON()
|
||||||
|
|
||||||
if !bytes.Equal(actual, expected) {
|
if !bytes.Equal(actual, expected) {
|
||||||
t.Errorf("Expecting %s but got %s", expected, actual)
|
t.Errorf("Expecting %s but got %s", expected, actual)
|
||||||
|
|
Loading…
Reference in New Issue