forked from ukamnya/microdata_mirror
		
	дополнительные варианты поиска контента
This commit is contained in:
		
							parent
							
								
									546a05f94b
								
							
						
					
					
						commit
						dc3bbfce4d
					
				
							
								
								
									
										40
									
								
								microdata.go
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								microdata.go
									
									
									
									
									
								
							@ -31,7 +31,7 @@ type Item struct {
 | 
				
			|||||||
// NewItem creates a new microdata item
 | 
					// NewItem creates a new microdata item
 | 
				
			||||||
func NewItem() *Item {
 | 
					func NewItem() *Item {
 | 
				
			||||||
	return &Item{
 | 
						return &Item{
 | 
				
			||||||
		Properties: make(propertyMap, 0),
 | 
							Properties: make(propertyMap),
 | 
				
			||||||
		Types:      make([]string, 0),
 | 
							Types:      make([]string, 0),
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -104,7 +104,7 @@ func (p *Parser) Parse() (*Microdata, error) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	topLevelItemNodes := make([]*html.Node, 0)
 | 
						topLevelItemNodes := make([]*html.Node, 0)
 | 
				
			||||||
	p.identifiedNodes = make(map[string]*html.Node, 0)
 | 
						p.identifiedNodes = make(map[string]*html.Node)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	walk(tree, func(n *html.Node) {
 | 
						walk(tree, func(n *html.Node) {
 | 
				
			||||||
		if n.Type == html.ElementNode {
 | 
							if n.Type == html.ElementNode {
 | 
				
			||||||
@ -171,39 +171,47 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
 | 
				
			|||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			var propertyValue string
 | 
								var propertyValue *string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			switch node.DataAtom {
 | 
								switch node.DataAtom {
 | 
				
			||||||
			case atom.Meta:
 | 
								case atom.Meta:
 | 
				
			||||||
				if val, exists := getAttr("content", node); exists {
 | 
									if val, exists := getAttr("content", node); exists {
 | 
				
			||||||
					propertyValue = val
 | 
										propertyValue = &val
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
 | 
								case atom.Audio, atom.Embed, atom.Iframe, atom.Img, atom.Source, atom.Track, atom.Video:
 | 
				
			||||||
				if urlValue, exists := getAttr("src", node); exists {
 | 
									if urlValue, exists := getAttr("src", node); exists {
 | 
				
			||||||
					if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
										if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
				
			||||||
						propertyValue = parsedURL.String()
 | 
											parsedStr := parsedURL.String()
 | 
				
			||||||
 | 
											propertyValue = &parsedStr
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			case atom.A, atom.Area, atom.Link:
 | 
								case atom.A, atom.Area, atom.Link:
 | 
				
			||||||
				if urlValue, exists := getAttr("href", node); exists {
 | 
									if urlValue, exists := getAttr("href", node); exists {
 | 
				
			||||||
					if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
										if parsedURL, err := p.base.Parse(urlValue); err == nil {
 | 
				
			||||||
						propertyValue = parsedURL.String()
 | 
											parsedStr := parsedURL.String()
 | 
				
			||||||
 | 
											propertyValue = &parsedStr
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			case atom.Object:
 | 
								case atom.Object:
 | 
				
			||||||
				if urlValue, exists := getAttr("data", node); exists {
 | 
									if urlValue, exists := getAttr("data", node); exists {
 | 
				
			||||||
					propertyValue = urlValue
 | 
										propertyValue = &urlValue
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			case atom.Data, atom.Meter:
 | 
								case atom.Data, atom.Meter:
 | 
				
			||||||
				if urlValue, exists := getAttr("value", node); exists {
 | 
									if urlValue, exists := getAttr("value", node); exists {
 | 
				
			||||||
					propertyValue = urlValue
 | 
										propertyValue = &urlValue
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			case atom.Time:
 | 
								case atom.Time:
 | 
				
			||||||
				if urlValue, exists := getAttr("datetime", node); exists {
 | 
									if urlValue, exists := getAttr("datetime", node); exists {
 | 
				
			||||||
					propertyValue = urlValue
 | 
										propertyValue = &urlValue
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			default:
 | 
								default:
 | 
				
			||||||
 | 
									// The "content" attribute can be found on other tags besides the meta tag.
 | 
				
			||||||
 | 
									if val, ok := getAttr("content", node); ok {
 | 
				
			||||||
 | 
										propertyValue = &val
 | 
				
			||||||
 | 
										break
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				var text bytes.Buffer
 | 
									var text bytes.Buffer
 | 
				
			||||||
				walk(node, func(n *html.Node) {
 | 
									walk(node, func(n *html.Node) {
 | 
				
			||||||
					if n.Type == html.TextNode {
 | 
										if n.Type == html.TextNode {
 | 
				
			||||||
@ -211,14 +219,22 @@ func (p *Parser) readItem(item *Item, node *html.Node) *Item {
 | 
				
			|||||||
					}
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				})
 | 
									})
 | 
				
			||||||
				propertyValue = text.String()
 | 
					
 | 
				
			||||||
 | 
									val := text.String()
 | 
				
			||||||
 | 
									propertyValue = &val
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if len(propertyValue) > 0 {
 | 
								if propertyValue == nil {
 | 
				
			||||||
 | 
									if val, ok := getAttr("content", node); ok {
 | 
				
			||||||
 | 
										propertyValue = &val
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if propertyValue != nil {
 | 
				
			||||||
				for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
									for _, propertyName := range strings.Split(strings.TrimSpace(itemprop), " ") {
 | 
				
			||||||
					propertyName = strings.TrimSpace(propertyName)
 | 
										propertyName = strings.TrimSpace(propertyName)
 | 
				
			||||||
					if propertyName != "" {
 | 
										if propertyName != "" {
 | 
				
			||||||
						item.AddString(propertyName, propertyValue)
 | 
											item.AddString(propertyName, *propertyValue)
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user