mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-11-25 04:13:32 -06:00
[chore] update dependencies, bump to Go 1.19.1 (#826)
* update dependencies, bump Go version to 1.19 * bump test image Go version * update golangci-lint * update gotosocial-drone-build * sign * linting, go fmt * update swagger docs * update swagger docs * whitespace * update contributing.md * fuckin whoopsie doopsie * linterino, linteroni * fix followrequest test not starting processor * fix other api/client tests not starting processor * fix remaining tests where processor not started * bump go-runners version * don't check last-webfingered-at, processor may have updated this * update swagger command * update bun to latest version * fix embed to work the same as before with new bun Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
This commit is contained in:
parent
00d38855d4
commit
a156188b3e
1135 changed files with 258905 additions and 137146 deletions
2
vendor/mvdan.cc/xurls/v2/.gitattributes
vendored
Normal file
2
vendor/mvdan.cc/xurls/v2/.gitattributes
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
# To prevent CRLF breakages on Windows for fragile files, like testdata.
|
||||
* -text
|
||||
8
vendor/mvdan.cc/xurls/v2/README.md
vendored
8
vendor/mvdan.cc/xurls/v2/README.md
vendored
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
[](https://pkg.go.dev/mvdan.cc/xurls/v2)
|
||||
|
||||
Extract urls from text using regular expressions. Requires Go 1.15 or later.
|
||||
Extract urls from text using regular expressions. Requires Go 1.16 or later.
|
||||
|
||||
```go
|
||||
import "mvdan.cc/xurls/v2"
|
||||
|
|
@ -22,14 +22,14 @@ Since API is centered around [regexp.Regexp](https://golang.org/pkg/regexp/#Rege
|
|||
many other methods are available, such as finding the [byte indexes](https://golang.org/pkg/regexp/#Regexp.FindAllIndex)
|
||||
for all matches.
|
||||
|
||||
Note that calling the exposed functions means compiling a regular expression, so
|
||||
repeated calls should be avoided.
|
||||
The regular expressions are compiled when the API is first called.
|
||||
Any subsequent calls will use the same regular expression pointers.
|
||||
|
||||
#### cmd/xurls
|
||||
|
||||
To install the tool globally:
|
||||
|
||||
cd $(mktemp -d); go mod init tmp; GO111MODULE=on go get mvdan.cc/xurls/v2/cmd/xurls
|
||||
go install mvdan.cc/xurls/v2/cmd/xurls@latest
|
||||
|
||||
```shell
|
||||
$ echo "Do gophers live in http://golang.org?" | xurls
|
||||
|
|
|
|||
15
vendor/mvdan.cc/xurls/v2/schemes.go
vendored
15
vendor/mvdan.cc/xurls/v2/schemes.go
vendored
|
|
@ -23,6 +23,7 @@ var Schemes = []string{
|
|||
`android`,
|
||||
`appdata`,
|
||||
`apt`,
|
||||
`ar`,
|
||||
`ark`,
|
||||
`attachment`,
|
||||
`aw`,
|
||||
|
|
@ -51,7 +52,6 @@ var Schemes = []string{
|
|||
`com-eventbrite-attendee`,
|
||||
`content`,
|
||||
`content-type`,
|
||||
`conti`,
|
||||
`crid`,
|
||||
`cvs`,
|
||||
`dab`,
|
||||
|
|
@ -85,6 +85,7 @@ var Schemes = []string{
|
|||
`fax`,
|
||||
`feed`,
|
||||
`feedready`,
|
||||
`fido`,
|
||||
`file`,
|
||||
`filesystem`,
|
||||
`finger`,
|
||||
|
|
@ -174,6 +175,7 @@ var Schemes = []string{
|
|||
`ms-inputapp`,
|
||||
`ms-lockscreencomponent-config`,
|
||||
`ms-media-stream-id`,
|
||||
`ms-meetnow`,
|
||||
`ms-mixedrealitycapture`,
|
||||
`ms-mobileplans`,
|
||||
`ms-officeapp`,
|
||||
|
|
@ -209,6 +211,7 @@ var Schemes = []string{
|
|||
`ms-settings-wifi`,
|
||||
`ms-settings-workplace`,
|
||||
`ms-spd`,
|
||||
`ms-stickers`,
|
||||
`ms-sttoverlay`,
|
||||
`ms-transit-to`,
|
||||
`ms-useractivityset`,
|
||||
|
|
@ -278,13 +281,14 @@ var Schemes = []string{
|
|||
`sftp`,
|
||||
`sgn`,
|
||||
`shc`,
|
||||
`shttp`,
|
||||
`sieve`,
|
||||
`simpleledger`,
|
||||
`simplex`,
|
||||
`sip`,
|
||||
`sips`,
|
||||
`skype`,
|
||||
`smb`,
|
||||
`smp`,
|
||||
`sms`,
|
||||
`smtp`,
|
||||
`snews`,
|
||||
|
|
@ -300,8 +304,10 @@ var Schemes = []string{
|
|||
`stun`,
|
||||
`stuns`,
|
||||
`submit`,
|
||||
`swh`,
|
||||
`svn`,
|
||||
`swh`,
|
||||
`swid`,
|
||||
`swidpath`,
|
||||
`tag`,
|
||||
`teamspeak`,
|
||||
`tel`,
|
||||
|
|
@ -318,12 +324,13 @@ var Schemes = []string{
|
|||
`tv`,
|
||||
`udp`,
|
||||
`unreal`,
|
||||
`upt`,
|
||||
`urn`,
|
||||
`ut2004`,
|
||||
`uuid-in-package`,
|
||||
`v-event`,
|
||||
`vemmi`,
|
||||
`ventrilo`,
|
||||
`ves`,
|
||||
`videotex`,
|
||||
`vnc`,
|
||||
`view-source`,
|
||||
|
|
|
|||
14
vendor/mvdan.cc/xurls/v2/tlds.go
vendored
14
vendor/mvdan.cc/xurls/v2/tlds.go
vendored
|
|
@ -34,7 +34,6 @@ var TLDs = []string{
|
|||
`aero`,
|
||||
`aetna`,
|
||||
`af`,
|
||||
`afamilycompany`,
|
||||
`afl`,
|
||||
`africa`,
|
||||
`ag`,
|
||||
|
|
@ -186,7 +185,6 @@ var TLDs = []string{
|
|||
`brussels`,
|
||||
`bs`,
|
||||
`bt`,
|
||||
`budapest`,
|
||||
`bugatti`,
|
||||
`build`,
|
||||
`builders`,
|
||||
|
|
@ -310,7 +308,6 @@ var TLDs = []string{
|
|||
`crs`,
|
||||
`cruise`,
|
||||
`cruises`,
|
||||
`csc`,
|
||||
`cu`,
|
||||
`cuisinella`,
|
||||
`cv`,
|
||||
|
|
@ -369,7 +366,6 @@ var TLDs = []string{
|
|||
`drive`,
|
||||
`dtv`,
|
||||
`dubai`,
|
||||
`duck`,
|
||||
`dunlop`,
|
||||
`dupont`,
|
||||
`durban`,
|
||||
|
|
@ -503,7 +499,6 @@ var TLDs = []string{
|
|||
`gives`,
|
||||
`giving`,
|
||||
`gl`,
|
||||
`glade`,
|
||||
`glass`,
|
||||
`gle`,
|
||||
`global`,
|
||||
|
|
@ -667,6 +662,7 @@ var TLDs = []string{
|
|||
`kh`,
|
||||
`ki`,
|
||||
`kia`,
|
||||
`kids`,
|
||||
`kim`,
|
||||
`kinder`,
|
||||
`kindle`,
|
||||
|
|
@ -729,7 +725,6 @@ var TLDs = []string{
|
|||
`lipsy`,
|
||||
`live`,
|
||||
`living`,
|
||||
`lixil`,
|
||||
`lk`,
|
||||
`llc`,
|
||||
`llp`,
|
||||
|
|
@ -887,7 +882,6 @@ var TLDs = []string{
|
|||
`nz`,
|
||||
`obi`,
|
||||
`observer`,
|
||||
`off`,
|
||||
`office`,
|
||||
`okinawa`,
|
||||
`olayan`,
|
||||
|
|
@ -987,10 +981,8 @@ var TLDs = []string{
|
|||
`qpon`,
|
||||
`quebec`,
|
||||
`quest`,
|
||||
`qvc`,
|
||||
`racing`,
|
||||
`radio`,
|
||||
`raid`,
|
||||
`re`,
|
||||
`read`,
|
||||
`realestate`,
|
||||
|
|
@ -1022,7 +1014,6 @@ var TLDs = []string{
|
|||
`ril`,
|
||||
`rio`,
|
||||
`rip`,
|
||||
`rmit`,
|
||||
`ro`,
|
||||
`rocher`,
|
||||
`rocks`,
|
||||
|
|
@ -1068,7 +1059,6 @@ var TLDs = []string{
|
|||
`schule`,
|
||||
`schwarz`,
|
||||
`science`,
|
||||
`scjohnson`,
|
||||
`scot`,
|
||||
`sd`,
|
||||
`se`,
|
||||
|
|
@ -1161,7 +1151,6 @@ var TLDs = []string{
|
|||
`suzuki`,
|
||||
`sv`,
|
||||
`swatch`,
|
||||
`swiftcover`,
|
||||
`swiss`,
|
||||
`sx`,
|
||||
`sy`,
|
||||
|
|
@ -1470,7 +1459,6 @@ var TLDs = []string{
|
|||
`嘉里`,
|
||||
`嘉里大酒店`,
|
||||
`在线`,
|
||||
`大众汽车`,
|
||||
`大拿`,
|
||||
`天主教`,
|
||||
`娱乐`,
|
||||
|
|
|
|||
4
vendor/mvdan.cc/xurls/v2/unicode.go
vendored
4
vendor/mvdan.cc/xurls/v2/unicode.go
vendored
|
|
@ -2,4 +2,6 @@
|
|||
|
||||
package xurls
|
||||
|
||||
const otherPuncMinusDoubleQuote = "!#%&'\\*,\\./:;\\?@\\\\¡§¶·¿;·՚՛՜՝՞՟։׀׃׆׳״؉؊،؍؛؞؟٪٫٬٭۔܀܁܂܃܄܅܆܇܈܉܊܋܌܍߷߸߹࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾࡞।॥॰৽੶૰౷಄෴๏๚๛༄༅༆༇༈༉༊་༌།༎༏༐༑༒༔྅࿐࿑࿒࿓࿔࿙࿚၊။၌၍၎၏჻፠፡።፣፤፥፦፧፨᙮᛫᛬᛭᜵᜶។៕៖៘៙៚᠀᠁᠂᠃᠄᠅᠇᠈᠉᠊᥄᥅᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᯼᯽᯾᯿᰻᰼᰽᰾᰿᱾᱿᳀᳁᳂᳃᳄᳅᳆᳇᳓‖‗†‡•‣․‥…‧‰‱′″‴‵‶‷‸※‼‽‾⁁⁂⁃⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁓⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⳹⳺⳻⳼⳾⳿⵰⸀⸁⸆⸇⸈⸋⸎⸏⸐⸑⸒⸓⸔⸕⸖⸘⸙⸛⸞⸟⸪⸫⸬⸭⸮⸰⸱⸲⸳⸴⸵⸶⸷⸸⸹⸼⸽⸾⸿⹁⹃⹄⹅⹆⹇⹈⹉⹊⹋⹌⹍⹎⹏⹒、。〃〽・꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꡴꡵꡶꡷꣎꣏꣸꣹꣺꣼꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꫞꫟꫰꫱꯫︐︑︒︓︔︕︖︙︰﹅﹆﹉﹊﹋﹌﹐﹑﹒﹔﹕﹖﹗﹟﹠﹡﹨﹪﹫!"#%&'*,./:;?@\。、・𐄀𐄁𐄂𐎟𐏐𐕯𐡗𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐫰𐫱𐫲𐫳𐫴𐫵𐫶𐬹𐬺𐬻𐬼𐬽𐬾𐬿𐮙𐮚𐮛𐮜𐽕𐽖𐽗𐽘𐽙𑁇𑁈𑁉𑁊𑁋𑁌𑁍𑂻𑂼𑂾𑂿𑃀𑃁𑅀𑅁𑅂𑅃𑅴𑅵𑇅𑇆𑇇𑇈𑇍𑇛𑇝𑇞𑇟𑈸𑈹𑈺𑈻𑈼𑈽𑊩𑑋𑑌𑑍𑑎𑑏𑑚𑑛𑑝𑓆𑗁𑗂𑗃𑗄𑗅𑗆𑗇𑗈𑗉𑗊𑗋𑗌𑗍𑗎𑗏𑗐𑗑𑗒𑗓𑗔𑗕𑗖𑗗𑙁𑙂𑙃𑙠𑙡𑙢𑙣𑙤𑙥𑙦𑙧𑙨𑙩𑙪𑙫𑙬𑜼𑜽𑜾𑠻𑥄𑥅𑥆𑧢𑨿𑩀𑩁𑩂𑩃𑩄𑩅𑩆𑪚𑪛𑪜𑪞𑪟𑪠𑪡𑪢𑱁𑱂𑱃𑱄𑱅𑱰𑱱𑻷𑻸𑿿𒑰𒑱𒑲𒑳𒑴𖩮𖩯𖫵𖬷𖬸𖬹𖬺𖬻𖭄𖺗𖺘𖺙𖺚𖿢𛲟𝪇𝪈𝪉𝪊𝪋𞥞𞥟"
|
||||
const allowedUcsChar = "¡-ᙿᚁ-\u1fff\u200b-‧\u202a-\u202e‰-⁞\u2060-\u2fff、-\ud7ff豈-\ufdcfﷰ-\uffef𐀀-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd"
|
||||
|
||||
const allowedUcsCharMinusPunc = "¢-¦¨-µ¸-¾À-ͽͿ-ΆΈ-ՙՠ-ֈ֊-ֿׁ-ׂׄ-ׇׅ-ײ\u05f5-؈؋؎-ؚ\u061c-\u061dؠ-٩ٮ-ۓە-ۿ\u070e-߶ߺ-\u082f\u083f-\u085d\u085f-ॣ०-९ॱ-ৼ৾-ੵ\u0a77-૯૱-\u0c76౸-ಃಅ-ෳ\u0df5-๎๐-๙\u0e5c-༃༓༕-྄྆-࿏࿕-࿘\u0fdb-၉ၐ-ჺჼ-፟፩-᙭ᙯ-ᙿᚁ-ᛪᛮ-᜴\u1737-៓ៗ៛-\u17ff᠆᠋-\u1943᥆-\u1a1dᨠ-\u1a9fᪧ\u1aae-᭙᭡-\u1bfbᰀ-\u1c3a᱀-ᱽᲀ-Ჿ\u1cc8-᳔᳒-\u1fff\u200b-―‘-‟\u202a-\u202e‹-›‿-⁀⁄-⁆⁒⁔\u2060-\u2cf8⳽ⴀ-ⵯ\u2d71-ⷿ⸂-⸅⸉-⸊⸌-⸍⸗⸚⸜-⸝⸠-⸩ⸯ⸺-⸻⹀⹂⹐-⹑\u2e53-\u2fff〄-〼〾-ヺー-ꓽꔀ-ꘌꘐ-꙲ꙴ-꙽ꙿ-꛱\ua6f8-ꡳ\ua878-\ua8cd꣐-ꣷꣻꣽ-꤭ꤰ-\ua95eꥠ-꧀\ua9ce-\ua9ddꧠ-\uaa5bꩠ-ꫝꫠ-ꫯꫲ-ꯪ꯬-\ud7ff豈-\ufdcfﷰ-️︗-︘\ufe1a-︯︱-﹄﹇-﹈﹍-﹏\ufe53﹘-﹞﹢-\ufe67﹩\ufe6c-\uff00$(-)+-0-9<->A-[]-⦆「-」ヲ-\uffef𐀀-\U000100ff\U00010103-\U0001039e𐎠-𐏏𐏑-\U0001056e\U00010570-\U00010856𐡘-\U0001091e𐤠-\U0001093e\U00010940-\U00010a4f\U00010a59-𐩾𐪀-𐫯\U00010af7-\U00010b38𐭀-\U00010b98\U00010b9d-𐽔\U00010f5a-𑁆\U0001104e-𑂺\U000110bd\U000110c2-𑄿𑅄-𑅳𑅶-𑇄𑇉-𑇌𑇎-𑇚𑇜\U000111e0-𑈷𑈾-𑊨\U000112aa-𑑊𑑐-𑑙\U0001145c𑑞-𑓅𑓇-𑗀𑗘-𑙀𑙄-\U0001165f\U0001166d-𑜻𑜿-𑠺\U0001183c-𑥃\U00011947-𑧡𑧣-𑨾𑩇-𑪙𑪝\U00011aa3-𑱀\U00011c46-\U00011c6f𑱲-𑻶\U00011ef9-\U00011ffe𒀀-\U0001246f\U00012475-\U00016a6d\U00016a70-𖫴\U00016af6-𖬶𖬼-𖭃𖭅-𖺖\U00016e9b-𖿡𖿣-𛲞\U0001bca0-𝪆\U0001da8c-\U0001e95d\U0001e960-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd"
|
||||
|
|
|
|||
138
vendor/mvdan.cc/xurls/v2/xurls.go
vendored
138
vendor/mvdan.cc/xurls/v2/xurls.go
vendored
|
|
@ -7,6 +7,7 @@ package xurls
|
|||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
|
|
@ -15,32 +16,71 @@ import (
|
|||
//go:generate go run ./generate/unicodegen
|
||||
|
||||
const (
|
||||
// pathCont is based on https://www.rfc-editor.org/rfc/rfc3987#section-2.2
|
||||
// but does not match separators anywhere or most puncutation in final position,
|
||||
// to avoid creating asymmetries like
|
||||
// `Did you know that **<a href="...">https://example.com/**</a> is reserved for documentation?`
|
||||
// from `Did you know that **https://example.com/** is reserved for documentation?`.
|
||||
unreservedChar = `a-zA-Z0-9\-._~`
|
||||
endUnreservedChar = `a-zA-Z0-9\-_~`
|
||||
midSubDelimChar = `!$&'*+,;=`
|
||||
endSubDelimChar = `$&+=`
|
||||
midIPathSegmentChar = unreservedChar + `%` + midSubDelimChar + `:@` + allowedUcsChar
|
||||
endIPathSegmentChar = endUnreservedChar + `%` + endSubDelimChar + allowedUcsCharMinusPunc
|
||||
iPrivateChar = `\x{E000}-\x{F8FF}\x{F0000}-\x{FFFFD}\x{100000}-\x{10FFFD}`
|
||||
midIChar = `/?#\\` + midIPathSegmentChar + iPrivateChar
|
||||
endIChar = `/#` + endIPathSegmentChar + iPrivateChar
|
||||
wellParen = `\((?:[` + midIChar + `]|\([` + midIChar + `]*\))*\)`
|
||||
wellBrack = `\[(?:[` + midIChar + `]|\[[` + midIChar + `]*\])*\]`
|
||||
wellBrace = `\{(?:[` + midIChar + `]|\{[` + midIChar + `]*\})*\}`
|
||||
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
|
||||
pathCont = `(?:[` + midIChar + `]*(?:` + wellAll + `|[` + endIChar + `]))+`
|
||||
|
||||
letter = `\p{L}`
|
||||
mark = `\p{M}`
|
||||
number = `\p{N}`
|
||||
iriChar = letter + mark + number
|
||||
currency = `\p{Sc}`
|
||||
otherSymb = `\p{So}`
|
||||
endChar = iriChar + `/\-_+&~%=#` + currency + otherSymb
|
||||
midChar = endChar + "_*" + otherPuncMinusDoubleQuote
|
||||
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
|
||||
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
|
||||
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
|
||||
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
|
||||
pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
|
||||
iri = `[` + iriChar + `](?:[` + iriChar + `\-]*[` + iriChar + `])?`
|
||||
subdomain = `(?:` + iri + `\.)+`
|
||||
octet = `(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
|
||||
ipv4Addr = octet + `\.` + octet + `\.` + octet + `\.` + octet
|
||||
|
||||
iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
|
||||
domain = `(` + iri + `\.)+`
|
||||
octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
|
||||
ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
|
||||
ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
|
||||
ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
|
||||
port = `(:[0-9]*)?`
|
||||
// ipv6Addr is based on https://datatracker.ietf.org/doc/html/rfc4291#section-2.2
|
||||
// with a specific alternative for each valid count of leading 16-bit hexadecimal "chomps"
|
||||
// that have not been replaced with a `::` elision.
|
||||
h4 = `[0-9a-fA-F]{1,4}`
|
||||
ipv6AddrMinusEmpty = `(?:` +
|
||||
// 7 colon-terminated chomps, followed by a final chomp or the rest of an elision.
|
||||
`(?:` + h4 + `:){7}(?:` + h4 + `|:)|` +
|
||||
// 6 chomps, followed by an IPv4 address or elision with final chomp or final elision.
|
||||
`(?:` + h4 + `:){6}(?:` + ipv4Addr + `|:` + h4 + `|:)|` +
|
||||
// 5 chomps, followed by an elision with optional IPv4 or up to 2 final chomps.
|
||||
`(?:` + h4 + `:){5}(?::` + ipv4Addr + `|(?::` + h4 + `){1,2}|:)|` +
|
||||
// 4 chomps, followed by an elision with optional IPv4 (optionally preceded by a chomp) or
|
||||
// up to 3 final chomps.
|
||||
`(?:` + h4 + `:){4}(?:(?::` + h4 + `){0,1}:` + ipv4Addr + `|(?::` + h4 + `){1,3}|:)|` +
|
||||
// 3 chomps, followed by an elision with optional IPv4 (preceded by up to 2 chomps) or
|
||||
// up to 4 final chomps.
|
||||
`(?:` + h4 + `:){3}(?:(?::` + h4 + `){0,2}:` + ipv4Addr + `|(?::` + h4 + `){1,4}|:)|` +
|
||||
// 2 chomps, followed by an elision with optional IPv4 (preceded by up to 3 chomps) or
|
||||
// up to 5 final chomps.
|
||||
`(?:` + h4 + `:){2}(?:(?::` + h4 + `){0,3}:` + ipv4Addr + `|(?::` + h4 + `){1,5}|:)|` +
|
||||
// 1 chomp, followed by an elision with optional IPv4 (preceded by up to 4 chomps) or
|
||||
// up to 6 final chomps.
|
||||
`(?:` + h4 + `:){1}(?:(?::` + h4 + `){0,4}:` + ipv4Addr + `|(?::` + h4 + `){1,6}|:)|` +
|
||||
// elision, followed by optional IPv4 (preceded by up to 5 chomps) or
|
||||
// up to 7 final chomps.
|
||||
// `:` is an intentionally omitted alternative, to avoid matching `::`.
|
||||
`:(?:(?::` + h4 + `){0,5}:` + ipv4Addr + `|(?::` + h4 + `){1,7})` +
|
||||
`)`
|
||||
ipv6Addr = `(?:` + ipv6AddrMinusEmpty + `|::)`
|
||||
ipAddrMinusEmpty = `(?:` + ipv6AddrMinusEmpty + `|\b` + ipv4Addr + `\b)`
|
||||
port = `(?::[0-9]*)?`
|
||||
)
|
||||
|
||||
// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid
|
||||
// scheme, and not just the known ones.
|
||||
var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||
var AnyScheme = `(?:[a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||
|
||||
// SchemesNoAuthority is a sorted list of some well-known url schemes that are
|
||||
// followed by ":" instead of "://". The list includes both officially
|
||||
|
|
@ -62,17 +102,33 @@ var SchemesNoAuthority = []string{
|
|||
//
|
||||
// Mostly collected from https://en.wikipedia.org/wiki/List_of_URI_schemes#Unofficial_but_common_URI_schemes.
|
||||
var SchemesUnofficial = []string{
|
||||
`jdbc`, // Java database Connectivity
|
||||
`postgres`, // PostgreSQL (short form)
|
||||
`postgresql`, // PostgreSQL
|
||||
`slack`, // Slack
|
||||
`zoommtg`, // Zoom (desktop)
|
||||
`zoomus`, // Zoom (mobile)
|
||||
`gemini`, // gemini
|
||||
`jdbc`, // Java database Connectivity
|
||||
`moz-extension`, // Firefox extension
|
||||
`postgres`, // PostgreSQL (short form)
|
||||
`postgresql`, // PostgreSQL
|
||||
`slack`, // Slack
|
||||
`zoommtg`, // Zoom (desktop)
|
||||
`zoomus`, // Zoom (mobile)
|
||||
}
|
||||
|
||||
// The regular expressions are compiled when the API is first called.
|
||||
// Any subsequent calls will use the same regular expression pointers.
|
||||
//
|
||||
// We do not need to make a copy of them for each API call,
|
||||
// as Copy is now only useful if one copy calls Longest but not another,
|
||||
// and we always call Longest after compiling the regular expression.
|
||||
var (
|
||||
strictRe *regexp.Regexp
|
||||
strictInit sync.Once
|
||||
|
||||
relaxedRe *regexp.Regexp
|
||||
relaxedInit sync.Once
|
||||
)
|
||||
|
||||
func anyOf(strs ...string) string {
|
||||
var b strings.Builder
|
||||
b.WriteByte('(')
|
||||
b.WriteString("(?:")
|
||||
for i, s := range strs {
|
||||
if i != 0 {
|
||||
b.WriteByte('|')
|
||||
|
|
@ -84,8 +140,8 @@ func anyOf(strs ...string) string {
|
|||
}
|
||||
|
||||
func strictExp() string {
|
||||
schemes := `((` + anyOf(Schemes...) + `|` + anyOf(SchemesUnofficial...) + `)://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||
return `(?i)` + schemes + `(?-i)` + pathCont
|
||||
schemes := `(?:(?i)(?:` + anyOf(Schemes...) + `|` + anyOf(SchemesUnofficial...) + `)://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||
return schemes + pathCont
|
||||
}
|
||||
|
||||
func relaxedExp() string {
|
||||
|
|
@ -102,35 +158,39 @@ func relaxedExp() string {
|
|||
// Use \b to make sure ASCII TLDs are immediately followed by a word break.
|
||||
// We can't do that with unicode TLDs, as they don't see following
|
||||
// whitespace as a word break.
|
||||
tlds := `(?i)(` + punycode + `|` + anyOf(append(asciiTLDs, PseudoTLDs...)...) + `\b|` + anyOf(unicodeTLDs...) + `)(?-i)`
|
||||
site := domain + tlds
|
||||
tlds := `(?:(?i)` + punycode + `|` + anyOf(append(asciiTLDs, PseudoTLDs...)...) + `\b|` + anyOf(unicodeTLDs...) + `)`
|
||||
domain := subdomain + tlds
|
||||
|
||||
hostName := `(` + site + `|` + ipAddr + `)`
|
||||
webURL := hostName + port + `(/|/` + pathCont + `)?`
|
||||
email := `[a-zA-Z0-9._%\-+]+@` + site
|
||||
return strictExp() + `|` + webURL + `|` + email
|
||||
hostName := `(?:` + domain + `|\[` + ipv6Addr + `\]|\b` + ipv4Addr + `\b)`
|
||||
webURL := hostName + port + `(?:/` + pathCont + `|/)?`
|
||||
email := `[a-zA-Z0-9._%\-+]+@` + domain
|
||||
return strictExp() + `|` + webURL + `|` + email + `|` + ipv6AddrMinusEmpty
|
||||
}
|
||||
|
||||
// Strict produces a regexp that matches any URL with a scheme in either the
|
||||
// Schemes or SchemesNoAuthority lists.
|
||||
func Strict() *regexp.Regexp {
|
||||
re := regexp.MustCompile(strictExp())
|
||||
re.Longest()
|
||||
return re
|
||||
strictInit.Do(func() {
|
||||
strictRe = regexp.MustCompile(strictExp())
|
||||
strictRe.Longest()
|
||||
})
|
||||
return strictRe
|
||||
}
|
||||
|
||||
// Relaxed produces a regexp that matches any URL matched by Strict, plus any
|
||||
// URL with no scheme or email address.
|
||||
func Relaxed() *regexp.Regexp {
|
||||
re := regexp.MustCompile(relaxedExp())
|
||||
re.Longest()
|
||||
return re
|
||||
relaxedInit.Do(func() {
|
||||
relaxedRe = regexp.MustCompile(relaxedExp())
|
||||
relaxedRe.Longest()
|
||||
})
|
||||
return relaxedRe
|
||||
}
|
||||
|
||||
// StrictMatchingScheme produces a regexp similar to Strict, but requiring that
|
||||
// the scheme match the given regular expression. See AnyScheme too.
|
||||
func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
|
||||
strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont
|
||||
strictMatching := `(?i)(?:` + exp + `)(?-i)` + pathCont
|
||||
re, err := regexp.Compile(strictMatching)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue