Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

2 changed files with 13 additions and 29 deletions

2
go.mod
View File

@ -1,4 +1,4 @@
module git.ukamnya.ru/ukamnya/microdata module github.com/iand/microdata
go 1.21.4 go 1.21.4

View File

@ -31,7 +31,7 @@ type Item struct {
// NewItem creates a new microdata item // NewItem creates a new microdata item
func NewItem() *Item { func NewItem() *Item {
return &Item{ return &Item{
Properties: make(propertyMap), Properties: make(propertyMap, 0),
Types: make([]string, 0), Types: make([]string, 0),
} }
} }
@ -104,7 +104,7 @@ func (p *Parser) Parse() (*Microdata, error) {
} }
topLevelItemNodes := make([]*html.Node, 0) topLevelItemNodes := make([]*html.Node, 0)
p.identifiedNodes = make(map[string]*html.Node) p.identifiedNodes = make(map[string]*html.Node, 0)
walk(tree, func(n *html.Node) { walk(tree, func(n *html.Node) {
if n.Type == html.ElementNode { if n.Type == html.ElementNode {
@ -171,47 +171,39 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
} }
} }
} else { } else {
var propertyValue *string var propertyValue string
switch node.DataAtom { switch node.DataAtom {
case atom.Meta: case atom.Meta:
if val, exists := getAttr("content", node); exists { if val, exists := getAttr("content", node); exists {
propertyValue = &val propertyValue = val
} }
case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video: case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
if urlValue, exists := getAttr("src", node); exists { if urlValue, exists := getAttr("src", node); exists {
if parsedURL, err := p.base.Parse(urlValue); err == nil { if parsedURL, err := p.base.Parse(urlValue); err == nil {
parsedStr := parsedURL.String() propertyValue = parsedURL.String()
propertyValue = &parsedStr
} }
} }
case atom.A, atom.Area, atom.Link: case atom.A, atom.Area, atom.Link:
if urlValue, exists := getAttr("href", node); exists { if urlValue, exists := getAttr("href", node); exists {
if parsedURL, err := p.base.Parse(urlValue); err == nil { if parsedURL, err := p.base.Parse(urlValue); err == nil {
parsedStr := parsedURL.String() propertyValue = parsedURL.String()
propertyValue = &parsedStr
} }
} }
case atom.Object: case atom.Object:
if urlValue, exists := getAttr("data", node); exists { if urlValue, exists := getAttr("data", node); exists {
propertyValue = &urlValue propertyValue = urlValue
} }
case atom.Data, atom.Meter: case atom.Data, atom.Meter:
if urlValue, exists := getAttr("value", node); exists { if urlValue, exists := getAttr("value", node); exists {
propertyValue = &urlValue propertyValue = urlValue
} }
case atom.Time: case atom.Time:
if urlValue, exists := getAttr("datetime", node); exists { if urlValue, exists := getAttr("datetime", node); exists {
propertyValue = &urlValue propertyValue = urlValue
} }
default: default:
// The "content" attribute can be found on other tags besides the meta tag.
if val, ok := getAttr("content", node); ok {
propertyValue = &val
break
}
var text bytes.Buffer var text bytes.Buffer
walk(node, func(n *html.Node) { walk(node, func(n *html.Node) {
if n.Type == html.TextNode { if n.Type == html.TextNode {
@ -219,22 +211,14 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
} }
}) })
propertyValue = text.String()
val := text.String()
propertyValue = &val
} }
if propertyValue == nil { if len(propertyValue) > 0 {
if val, ok := getAttr("content", node); ok {
propertyValue = &val
}
}
if propertyValue != nil {
for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") { for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
propertyName = strings.TrimSpace(propertyName) propertyName = strings.TrimSpace(propertyName)
if propertyName != "" { if propertyName != "" {
item.AddString(propertyName, *propertyValue) item.AddString(propertyName, propertyValue)
} }
} }
} }