mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-11-02 20:02:25 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			386 lines
		
	
	
		
			No EOL
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Ragel
		
	
	
	
	
	
			
		
		
	
	
			386 lines
		
	
	
		
			No EOL
		
	
	
		
			9.5 KiB
		
	
	
	
		
			Ragel
		
	
	
	
	
	
package urn
 | 
						|
 | 
						|
import (
 | 
						|
    "fmt"
 | 
						|
 | 
						|
    scimschema "github.com/leodido/go-urn/scim/schema"
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
    errPrefix              = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
 | 
						|
    errIdentifier          = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its beginning) [col %d]"
 | 
						|
    errSpecificString      = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
 | 
						|
    errNoUrnWithinID       = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
 | 
						|
    errHex                 = "expecting the percent encoded chars to be well-formed (%%alnum{2}) [col %d]"
 | 
						|
    errSCIMNamespace       = "expecing the SCIM namespace identifier (ietf:params:scim) [col %d]"
 | 
						|
    errSCIMType            = "expecting a correct SCIM type (schemas, api, param) [col %d]"
 | 
						|
    errSCIMName            = "expecting one or more alnum char in the SCIM name part [col %d]"
 | 
						|
    errSCIMOther           = "expecting a well-formed other SCIM part [col %d]"
 | 
						|
    errSCIMOtherIncomplete = "expecting a not empty SCIM other part after colon [col %d]"
 | 
						|
    err8141InformalID      = "informal URN namespace must be in the form urn-[1-9][0-9] [col %d]"
 | 
						|
    err8141SpecificString  = "expecting the specific string to contain alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] not in first position) chars [col %d]"
 | 
						|
    err8141Identifier      = "expecting the indentifier to be a string with (length 2 to 32 chars) containing alnum (or dashes) not starting or ending with a dash [col %d]"
 | 
						|
    err8141RComponentStart = "expecting only one r-component (starting with the ?+ sequence) [col %d]"
 | 
						|
    err8141QComponentStart = "expecting only one q-component (starting with the ?= sequence) [col %d]"
 | 
						|
    err8141MalformedRComp  = "expecting a non-empty r-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]"
 | 
						|
    err8141MalformedQComp  = "expecting a non-empty q-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]"
 | 
						|
)
 | 
						|
 | 
						|
%%{
 | 
						|
machine urn;
 | 
						|
 | 
						|
# unsigned alphabet
 | 
						|
alphtype uint8;
 | 
						|
 | 
						|
action mark {
 | 
						|
    m.pb = m.p
 | 
						|
}
 | 
						|
 | 
						|
action tolower {
 | 
						|
    // List of positions in the buffer to later lowercase
 | 
						|
    output.tolower = append(output.tolower, m.p - m.pb)
 | 
						|
}
 | 
						|
 | 
						|
action set_pre {
 | 
						|
    output.prefix = string(m.text())
 | 
						|
}
 | 
						|
 | 
						|
action throw_pre_urn_err {
 | 
						|
    if m.parsingMode != RFC8141Only {
 | 
						|
        // Throw an error when:
 | 
						|
        // - we are entering here matching the the prefix in the namespace identifier part
 | 
						|
        // - looking ahead (3 chars) we find a colon
 | 
						|
        if pos := m.p + 3; pos < m.pe && m.data[pos] == 58 && output.prefix != "" {
 | 
						|
            m.err = fmt.Errorf(errNoUrnWithinID, pos)
 | 
						|
            fhold;
 | 
						|
            fgoto fail;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
action set_nid {
 | 
						|
    output.ID = string(m.text())
 | 
						|
}
 | 
						|
 | 
						|
action set_nss {
 | 
						|
    output.SS = string(m.text())
 | 
						|
    // Iterate upper letters lowering them
 | 
						|
    for _, i := range output.tolower {
 | 
						|
        m.data[m.pb+i] = m.data[m.pb+i] + 32
 | 
						|
    }
 | 
						|
    output.norm = string(m.text())
 | 
						|
    // Revert the buffer to the original
 | 
						|
    for _, i := range output.tolower {
 | 
						|
        m.data[m.pb+i] = m.data[m.pb+i] - 32
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
action err_pre {
 | 
						|
    m.err = fmt.Errorf(errPrefix, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_nid {
 | 
						|
    m.err = fmt.Errorf(errIdentifier, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_nss {
 | 
						|
    m.err = fmt.Errorf(errSpecificString, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_urn {
 | 
						|
    m.err = fmt.Errorf(errNoUrnWithinID, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_hex {
 | 
						|
    if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only {
 | 
						|
        m.err = fmt.Errorf(errHex, m.p)
 | 
						|
        fhold;
 | 
						|
        fgoto fail;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
action base_type {
 | 
						|
    output.kind = RFC2141;
 | 
						|
}
 | 
						|
 | 
						|
pre = ([uU] @err(err_pre) [rR] @err(err_pre) [nN] @err(err_pre)) >mark >throw_pre_urn_err %set_pre;
 | 
						|
 | 
						|
nid = (alnum >mark (alnum | '-'){0,31}) $err(err_nid) %set_nid;
 | 
						|
 | 
						|
hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
 | 
						|
 | 
						|
sss = (alnum | [()+,\-.:=@;$_!*']);
 | 
						|
 | 
						|
nss = (sss | hex)+ $err(err_nss);
 | 
						|
 | 
						|
nid_not_urn = (nid - pre %err(err_urn));
 | 
						|
 | 
						|
urn = pre ':' @err(err_pre) (nid_not_urn ':' nss >mark %set_nss) %eof(base_type);
 | 
						|
 | 
						|
### SCIM BEG
 | 
						|
 | 
						|
action err_scim_nid {
 | 
						|
    m.err = fmt.Errorf(errSCIMNamespace, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_scim_type {
 | 
						|
    m.err = fmt.Errorf(errSCIMType, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_scim_name {
 | 
						|
    m.err = fmt.Errorf(errSCIMName, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_scim_other {
 | 
						|
    if m.p == m.pe {
 | 
						|
        m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1)
 | 
						|
    } else {
 | 
						|
        m.err = fmt.Errorf(errSCIMOther, m.p)
 | 
						|
    }
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action scim_type {
 | 
						|
    output.kind = RFC7643;
 | 
						|
}
 | 
						|
 | 
						|
action create_scim {
 | 
						|
    output.scim = &SCIM{};
 | 
						|
}
 | 
						|
 | 
						|
action set_scim_type {
 | 
						|
    output.scim.Type = scimschema.TypeFromString(string(m.text()))
 | 
						|
}
 | 
						|
 | 
						|
action mark_scim_name {
 | 
						|
    output.scim.pos = m.p
 | 
						|
}
 | 
						|
 | 
						|
action set_scim_name {
 | 
						|
    output.scim.Name = string(m.data[output.scim.pos:m.p])
 | 
						|
}
 | 
						|
 | 
						|
action mark_scim_other {
 | 
						|
    output.scim.pos = m.p
 | 
						|
}
 | 
						|
 | 
						|
action set_scim_other {
 | 
						|
    output.scim.Other = string(m.data[output.scim.pos:m.p])
 | 
						|
}
 | 
						|
 | 
						|
scim_nid = 'ietf:params:scim' >mark %set_nid %create_scim $err(err_scim_nid);
 | 
						|
 | 
						|
scim_other = ':' (sss | hex)+ >mark_scim_other %set_scim_other $err(err_scim_other);
 | 
						|
 | 
						|
scim_name = (alnum)+ >mark_scim_name %set_scim_name $err(err_scim_name);
 | 
						|
 | 
						|
scim_type = ('schemas' | 'api' | 'param') >mark %set_scim_type $err(err_scim_type);
 | 
						|
 | 
						|
scim_only := pre ':' @err(err_pre) (scim_nid ':' scim_type ':' scim_name scim_other? %set_nss) %eof(scim_type);
 | 
						|
 | 
						|
### SCIM END
 | 
						|
 | 
						|
### 8141 BEG
 | 
						|
 | 
						|
action err_nss_8141 {
 | 
						|
    m.err = fmt.Errorf(err8141SpecificString, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_nid_8141 {
 | 
						|
    m.err = fmt.Errorf(err8141Identifier, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action rfc8141_type {
 | 
						|
    output.kind = RFC8141;
 | 
						|
}
 | 
						|
 | 
						|
action set_r_component {
 | 
						|
    output.rComponent = string(m.text())
 | 
						|
}
 | 
						|
 | 
						|
action set_q_component {
 | 
						|
    output.qComponent = string(m.text())
 | 
						|
}
 | 
						|
 | 
						|
action set_f_component {
 | 
						|
    output.fComponent = string(m.text())
 | 
						|
}
 | 
						|
 | 
						|
action informal_nid_match {
 | 
						|
    fhold;
 | 
						|
    m.err = fmt.Errorf(err8141InformalID, m.p);
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action mark_r_start {
 | 
						|
    if output.rStart {
 | 
						|
        m.err = fmt.Errorf(err8141RComponentStart, m.p)
 | 
						|
        fhold;
 | 
						|
        fgoto fail;
 | 
						|
    }
 | 
						|
    output.rStart = true
 | 
						|
}
 | 
						|
 | 
						|
action mark_q_start {
 | 
						|
    if output.qStart {
 | 
						|
        m.err = fmt.Errorf(err8141QComponentStart, m.p)
 | 
						|
        fhold;
 | 
						|
        fgoto fail;
 | 
						|
    }
 | 
						|
    output.qStart = true
 | 
						|
}
 | 
						|
 | 
						|
action err_malformed_r_component {
 | 
						|
    m.err = fmt.Errorf(err8141MalformedRComp, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
action err_malformed_q_component {
 | 
						|
    m.err = fmt.Errorf(err8141MalformedQComp, m.p)
 | 
						|
    fhold;
 | 
						|
    fgoto fail;
 | 
						|
}
 | 
						|
 | 
						|
pchar = (sss | '~' | '&' | hex);
 | 
						|
 | 
						|
component = pchar (pchar | '/' | '?')*;
 | 
						|
 | 
						|
r_start = ('?+') %mark_r_start;
 | 
						|
 | 
						|
r_component = r_start <: (r_start | component)+ $err(err_malformed_r_component) >mark %set_r_component;
 | 
						|
 | 
						|
q_start = ('?=') %mark_q_start;
 | 
						|
 | 
						|
q_component = q_start <: (q_start | component)+ $err(err_malformed_q_component) >mark %set_q_component;
 | 
						|
 | 
						|
rq_components = (r_component :>> q_component? | q_component);
 | 
						|
 | 
						|
fragment = (pchar | '/' | '?')*;
 | 
						|
 | 
						|
f_component = '#' fragment >mark %set_f_component;
 | 
						|
 | 
						|
nss_rfc8141 = (pchar >mark (pchar | '/')*) $err(err_nss_8141) %set_nss;
 | 
						|
 | 
						|
nid_rfc8141 = (alnum >mark (alnum | '-'){0,30} alnum) $err(err_nid_8141) %set_nid;
 | 
						|
 | 
						|
informal_id = pre ('-' [a-zA-z0] %to(informal_nid_match));
 | 
						|
 | 
						|
nid_rfc8141_not_urn = (nid_rfc8141 - informal_id?);
 | 
						|
 | 
						|
rfc8141_only := pre ':' @err(err_pre) nid_rfc8141_not_urn ':' nss_rfc8141 rq_components? f_component? %eof(rfc8141_type);
 | 
						|
 | 
						|
### 8141 END
 | 
						|
 | 
						|
fail := (any - [\n\r])* @err{ fgoto main; };
 | 
						|
 | 
						|
main := urn;
 | 
						|
 | 
						|
}%%
 | 
						|
 | 
						|
%% write data noerror noprefix;
 | 
						|
 | 
						|
// Machine is the interface representing the FSM
 | 
						|
type Machine interface {
 | 
						|
    Error() error
 | 
						|
    Parse(input []byte) (*URN, error)
 | 
						|
    WithParsingMode(ParsingMode)
 | 
						|
}
 | 
						|
 | 
						|
type machine struct {
 | 
						|
    data              []byte
 | 
						|
    cs                int
 | 
						|
    p, pe, eof, pb    int
 | 
						|
    err               error
 | 
						|
    startParsingAt    int
 | 
						|
    parsingMode       ParsingMode
 | 
						|
    parsingModeSet    bool
 | 
						|
}
 | 
						|
 | 
						|
// NewMachine creates a new FSM able to parse RFC 2141 strings.
 | 
						|
func NewMachine(options ...Option) Machine {
 | 
						|
    m := &machine{
 | 
						|
        parsingModeSet: false,
 | 
						|
    }
 | 
						|
 | 
						|
    for _, o := range options {
 | 
						|
        o(m)
 | 
						|
    }
 | 
						|
    // Set default parsing mode
 | 
						|
    if !m.parsingModeSet {
 | 
						|
        m.WithParsingMode(DefaultParsingMode)
 | 
						|
    }
 | 
						|
 | 
						|
    %% access m.;
 | 
						|
    %% variable p m.p;
 | 
						|
    %% variable pe m.pe;
 | 
						|
    %% variable eof m.eof;
 | 
						|
    %% variable data m.data;
 | 
						|
 | 
						|
    return m
 | 
						|
}
 | 
						|
 | 
						|
// Err returns the error that occurred on the last call to Parse.
 | 
						|
//
 | 
						|
// If the result is nil, then the line was parsed successfully.
 | 
						|
func (m *machine) Error() error {
 | 
						|
    return m.err
 | 
						|
}
 | 
						|
 | 
						|
func (m *machine) text() []byte {
 | 
						|
    return m.data[m.pb:m.p]
 | 
						|
}
 | 
						|
 | 
						|
// Parse parses the input byte array as a RFC 2141 or RFC7643 string.
 | 
						|
func (m *machine) Parse(input []byte) (*URN, error) {
 | 
						|
    m.data = input
 | 
						|
    m.p = 0
 | 
						|
    m.pb = 0
 | 
						|
    m.pe = len(input)
 | 
						|
    m.eof = len(input)
 | 
						|
    m.err = nil
 | 
						|
    m.cs = m.startParsingAt
 | 
						|
    output := &URN{
 | 
						|
        tolower: []int{},
 | 
						|
    }
 | 
						|
 | 
						|
    %% write exec;
 | 
						|
 | 
						|
    if m.cs < first_final || m.cs == en_fail {
 | 
						|
        return nil, m.err
 | 
						|
    }
 | 
						|
 | 
						|
    return output, nil
 | 
						|
}
 | 
						|
 | 
						|
func (m *machine) WithParsingMode(x ParsingMode) {
 | 
						|
    m.parsingMode = x
 | 
						|
    switch m.parsingMode {
 | 
						|
        case RFC2141Only:
 | 
						|
            m.startParsingAt = en_main
 | 
						|
        case RFC8141Only:
 | 
						|
            m.startParsingAt = en_rfc8141_only
 | 
						|
        case RFC7643Only:
 | 
						|
            m.startParsingAt = en_scim_only
 | 
						|
    }
 | 
						|
    m.parsingModeSet = true
 | 
						|
} |