mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-04 04:42:25 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			139 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			139 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package html
 | 
						|
 | 
						|
import (
 | 
						|
	"github.com/tdewolff/parse/v2"
 | 
						|
	"github.com/tdewolff/parse/v2/html"
 | 
						|
)
 | 
						|
 | 
						|
// Token is a single token unit with an attribute value (if given) and hash of the data.
 | 
						|
type Token struct {
 | 
						|
	html.TokenType
 | 
						|
	Hash        Hash
 | 
						|
	Data        []byte
 | 
						|
	Text        []byte
 | 
						|
	AttrVal     []byte
 | 
						|
	Traits      traits
 | 
						|
	Offset      int
 | 
						|
	HasTemplate bool
 | 
						|
}
 | 
						|
 | 
						|
// TokenBuffer is a buffer that allows for token look-ahead.
 | 
						|
type TokenBuffer struct {
 | 
						|
	r *parse.Input
 | 
						|
	l *html.Lexer
 | 
						|
 | 
						|
	buf []Token
 | 
						|
	pos int
 | 
						|
 | 
						|
	attrBuffer []*Token
 | 
						|
}
 | 
						|
 | 
						|
// NewTokenBuffer returns a new TokenBuffer.
 | 
						|
func NewTokenBuffer(r *parse.Input, l *html.Lexer) *TokenBuffer {
 | 
						|
	return &TokenBuffer{
 | 
						|
		r:   r,
 | 
						|
		l:   l,
 | 
						|
		buf: make([]Token, 0, 8),
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (z *TokenBuffer) read(t *Token) {
 | 
						|
	t.Offset = z.r.Offset()
 | 
						|
	t.TokenType, t.Data = z.l.Next()
 | 
						|
	t.Text = z.l.Text()
 | 
						|
	t.HasTemplate = z.l.HasTemplate()
 | 
						|
	if t.TokenType == html.AttributeToken {
 | 
						|
		t.Offset += 1 + len(t.Text) + 1
 | 
						|
		t.AttrVal = z.l.AttrVal()
 | 
						|
		if 1 < len(t.AttrVal) && (t.AttrVal[0] == '"' || t.AttrVal[0] == '\'') {
 | 
						|
			t.Offset++
 | 
						|
			t.AttrVal = t.AttrVal[1 : len(t.AttrVal)-1] // quotes will be readded in attribute loop if necessary
 | 
						|
		}
 | 
						|
		t.Hash = ToHash(t.Text)
 | 
						|
		t.Traits = attrMap[t.Hash]
 | 
						|
	} else if t.TokenType == html.StartTagToken || t.TokenType == html.EndTagToken {
 | 
						|
		t.AttrVal = nil
 | 
						|
		t.Hash = ToHash(t.Text)
 | 
						|
		t.Traits = tagMap[t.Hash] // zero if not exist
 | 
						|
	} else {
 | 
						|
		t.AttrVal = nil
 | 
						|
		t.Hash = 0
 | 
						|
		t.Traits = 0
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Peek returns the ith element and possibly does an allocation.
 | 
						|
// Peeking past an error will panic.
 | 
						|
func (z *TokenBuffer) Peek(pos int) *Token {
 | 
						|
	pos += z.pos
 | 
						|
	if pos >= len(z.buf) {
 | 
						|
		if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == html.ErrorToken {
 | 
						|
			return &z.buf[len(z.buf)-1]
 | 
						|
		}
 | 
						|
 | 
						|
		c := cap(z.buf)
 | 
						|
		d := len(z.buf) - z.pos
 | 
						|
		p := pos - z.pos + 1 // required peek length
 | 
						|
		var buf []Token
 | 
						|
		if 2*p > c {
 | 
						|
			buf = make([]Token, 0, 2*c+p)
 | 
						|
		} else {
 | 
						|
			buf = z.buf
 | 
						|
		}
 | 
						|
		copy(buf[:d], z.buf[z.pos:])
 | 
						|
 | 
						|
		buf = buf[:p]
 | 
						|
		pos -= z.pos
 | 
						|
		for i := d; i < p; i++ {
 | 
						|
			z.read(&buf[i])
 | 
						|
			if buf[i].TokenType == html.ErrorToken {
 | 
						|
				buf = buf[:i+1]
 | 
						|
				pos = i
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
		z.pos, z.buf = 0, buf
 | 
						|
	}
 | 
						|
	return &z.buf[pos]
 | 
						|
}
 | 
						|
 | 
						|
// Shift returns the first element and advances position.
 | 
						|
func (z *TokenBuffer) Shift() *Token {
 | 
						|
	if z.pos >= len(z.buf) {
 | 
						|
		t := &z.buf[:1][0]
 | 
						|
		z.read(t)
 | 
						|
		return t
 | 
						|
	}
 | 
						|
	t := &z.buf[z.pos]
 | 
						|
	z.pos++
 | 
						|
	return t
 | 
						|
}
 | 
						|
 | 
						|
// Attributes extracts the gives attribute hashes from a tag.
 | 
						|
// It returns in the same order pointers to the requested token data or nil.
 | 
						|
func (z *TokenBuffer) Attributes(hashes ...Hash) []*Token {
 | 
						|
	n := 0
 | 
						|
	for {
 | 
						|
		if t := z.Peek(n); t.TokenType != html.AttributeToken {
 | 
						|
			break
 | 
						|
		}
 | 
						|
		n++
 | 
						|
	}
 | 
						|
	if len(hashes) > cap(z.attrBuffer) {
 | 
						|
		z.attrBuffer = make([]*Token, len(hashes))
 | 
						|
	} else {
 | 
						|
		z.attrBuffer = z.attrBuffer[:len(hashes)]
 | 
						|
		for i := range z.attrBuffer {
 | 
						|
			z.attrBuffer[i] = nil
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for i := z.pos; i < z.pos+n; i++ {
 | 
						|
		attr := &z.buf[i]
 | 
						|
		for j, hash := range hashes {
 | 
						|
			if hash == attr.Hash {
 | 
						|
				z.attrBuffer[j] = attr
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return z.attrBuffer
 | 
						|
}
 |