mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-09 02:57:30 -06:00
[bugfix] html escape special characters in text instead of totally removing them (#719)
* remove minify dependency * tidy up some tests * remove pre + postformat funcs * rework sanitization + formatting * update tests * add some more markdown tests
This commit is contained in:
parent
098dbe6ff4
commit
c84384e660
51 changed files with 129 additions and 7419 deletions
98
vendor/github.com/tdewolff/parse/v2/html/README.md
generated
vendored
98
vendor/github.com/tdewolff/parse/v2/html/README.md
generated
vendored
|
|
@ -1,98 +0,0 @@
|
|||
# HTML [](https://pkg.go.dev/github.com/tdewolff/parse/v2/html?tab=doc)
|
||||
|
||||
This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF.
|
||||
|
||||
## Installation
|
||||
Run the following command
|
||||
|
||||
go get -u github.com/tdewolff/parse/v2/html
|
||||
|
||||
or add the following import and run project with `go get`
|
||||
|
||||
import "github.com/tdewolff/parse/v2/html"
|
||||
|
||||
## Lexer
|
||||
### Usage
|
||||
The following initializes a new Lexer with io.Reader `r`:
|
||||
``` go
|
||||
l := html.NewLexer(parse.NewInput(r))
|
||||
```
|
||||
|
||||
To tokenize until EOF an error, use:
|
||||
``` go
|
||||
for {
|
||||
tt, data := l.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
// error or EOF set in l.Err()
|
||||
return
|
||||
case html.StartTagToken:
|
||||
// ...
|
||||
for {
|
||||
ttAttr, dataAttr := l.Next()
|
||||
if ttAttr != html.AttributeToken {
|
||||
break
|
||||
}
|
||||
// ...
|
||||
}
|
||||
// ...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
All tokens:
|
||||
``` go
|
||||
ErrorToken TokenType = iota // extra token when errors occur
|
||||
CommentToken
|
||||
DoctypeToken
|
||||
StartTagToken
|
||||
StartTagCloseToken
|
||||
StartTagVoidToken
|
||||
EndTagToken
|
||||
AttributeToken
|
||||
TextToken
|
||||
```
|
||||
|
||||
### Examples
|
||||
``` go
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/tdewolff/parse/v2/html"
|
||||
)
|
||||
|
||||
// Tokenize HTML from stdin.
|
||||
func main() {
|
||||
l := html.NewLexer(parse.NewInput(os.Stdin))
|
||||
for {
|
||||
tt, data := l.Next()
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
if l.Err() != io.EOF {
|
||||
fmt.Println("Error on line", l.Line(), ":", l.Err())
|
||||
}
|
||||
return
|
||||
case html.StartTagToken:
|
||||
fmt.Println("Tag", string(data))
|
||||
for {
|
||||
ttAttr, dataAttr := l.Next()
|
||||
if ttAttr != html.AttributeToken {
|
||||
break
|
||||
}
|
||||
|
||||
key := dataAttr
|
||||
val := l.AttrVal()
|
||||
fmt.Println("Attribute", string(key), "=", string(val))
|
||||
}
|
||||
// ...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md).
|
||||
|
||||
[1]: http://golang.org/ "Go Language"
|
||||
Loading…
Add table
Add a link
Reference in a new issue