mirror of
				https://github.com/superseriousbusiness/gotosocial.git
				synced 2025-10-31 06:52:26 -05:00 
			
		
		
		
	* add back exif-terminator and use only for jpeg,png,webp * fix arguments passed to terminateExif() * pull in latest exif-terminator * fix test * update processed img --------- Co-authored-by: tobi <tobi.smethurst@protonmail.com>
		
			
				
	
	
		
			175 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
	
		
			5.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2020 Google Inc. All rights reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package s2
 | |
| 
 | |
| import (
 | |
| 	"encoding/binary"
 | |
| 	"hash/adler32"
 | |
| 	"math"
 | |
| 	"sort"
 | |
| )
 | |
| 
 | |
| // TODO(roberts): If any of these are worth making public, change the
 | |
| // method signatures and type names.
 | |
| 
 | |
| // emptySetID represents the last ID that will ever be generated.
 | |
| // (Non-negative IDs are reserved for singleton sets.)
 | |
| var emptySetID = int32(math.MinInt32)
 | |
| 
 | |
| // idSetLexicon compactly represents a set of non-negative
 | |
| // integers such as array indices ("ID sets"). It is especially suitable when
 | |
| // either (1) there are many duplicate sets, or (2) there are many singleton
 | |
| // or empty sets. See also sequenceLexicon.
 | |
| //
 | |
| // Each distinct ID set is mapped to a 32-bit integer. Empty and singleton
 | |
| // sets take up no additional space; the set itself is represented
 | |
| // by the unique ID assigned to the set. Duplicate sets are automatically
 | |
| // eliminated. Note also that ID sets are referred to using 32-bit integers
 | |
| // rather than pointers.
 | |
| type idSetLexicon struct {
 | |
| 	idSets *sequenceLexicon
 | |
| }
 | |
| 
 | |
| func newIDSetLexicon() *idSetLexicon {
 | |
| 	return &idSetLexicon{
 | |
| 		idSets: newSequenceLexicon(),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // add adds the given set of integers to the lexicon if it is not already
 | |
| // present, and return the unique ID for this set. The values are automatically
 | |
| // sorted and duplicates are removed.
 | |
| //
 | |
| // The primary difference between this and sequenceLexicon are:
 | |
| // 1. Empty and singleton sets are represented implicitly; they use no space.
 | |
| // 2. Sets are represented rather than sequences; the ordering of values is
 | |
| //    not important and duplicates are removed.
 | |
| // 3. The values must be 32-bit non-negative integers only.
 | |
| func (l *idSetLexicon) add(ids ...int32) int32 {
 | |
| 	// Empty sets have a special ID chosen not to conflict with other IDs.
 | |
| 	if len(ids) == 0 {
 | |
| 		return emptySetID
 | |
| 	}
 | |
| 
 | |
| 	// Singleton sets are represented by their element.
 | |
| 	if len(ids) == 1 {
 | |
| 		return ids[0]
 | |
| 	}
 | |
| 
 | |
| 	// Canonicalize the set by sorting and removing duplicates.
 | |
| 	//
 | |
| 	// Creates a new slice in order to not alter the supplied values.
 | |
| 	set := uniqueInt32s(ids)
 | |
| 
 | |
| 	// Non-singleton sets are represented by the bitwise complement of the ID
 | |
| 	// returned by the sequenceLexicon
 | |
| 	return ^l.idSets.add(set)
 | |
| }
 | |
| 
 | |
| // idSet returns the set of integers corresponding to an ID returned by add.
 | |
| func (l *idSetLexicon) idSet(setID int32) []int32 {
 | |
| 	if setID >= 0 {
 | |
| 		return []int32{setID}
 | |
| 	}
 | |
| 	if setID == emptySetID {
 | |
| 		return []int32{}
 | |
| 	}
 | |
| 
 | |
| 	return l.idSets.sequence(^setID)
 | |
| }
 | |
| 
 | |
| func (l *idSetLexicon) clear() {
 | |
| 	l.idSets.clear()
 | |
| }
 | |
| 
 | |
| // sequenceLexicon compactly represents a sequence of values (e.g., tuples).
 | |
| // It automatically eliminates duplicates slices, and maps the remaining
 | |
| // sequences to sequentially increasing integer IDs. See also idSetLexicon.
 | |
| //
 | |
| // Each distinct sequence is mapped to a 32-bit integer.
 | |
| type sequenceLexicon struct {
 | |
| 	values []int32
 | |
| 	begins []uint32
 | |
| 
 | |
| 	// idSet is a mapping of a sequence hash to sequence index in the lexicon.
 | |
| 	idSet map[uint32]int32
 | |
| }
 | |
| 
 | |
| func newSequenceLexicon() *sequenceLexicon {
 | |
| 	return &sequenceLexicon{
 | |
| 		begins: []uint32{0},
 | |
| 		idSet:  make(map[uint32]int32),
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // clears all data from the lexicon.
 | |
| func (l *sequenceLexicon) clear() {
 | |
| 	l.values = nil
 | |
| 	l.begins = []uint32{0}
 | |
| 	l.idSet = make(map[uint32]int32)
 | |
| }
 | |
| 
 | |
| // add adds the given value to the lexicon if it is not already present, and
 | |
| // returns its ID. IDs are assigned sequentially starting from zero.
 | |
| func (l *sequenceLexicon) add(ids []int32) int32 {
 | |
| 	if id, ok := l.idSet[hashSet(ids)]; ok {
 | |
| 		return id
 | |
| 	}
 | |
| 	for _, v := range ids {
 | |
| 		l.values = append(l.values, v)
 | |
| 	}
 | |
| 	l.begins = append(l.begins, uint32(len(l.values)))
 | |
| 
 | |
| 	id := int32(len(l.begins)) - 2
 | |
| 	l.idSet[hashSet(ids)] = id
 | |
| 
 | |
| 	return id
 | |
| }
 | |
| 
 | |
| // sequence returns the original sequence of values for the given ID.
 | |
| func (l *sequenceLexicon) sequence(id int32) []int32 {
 | |
| 	return l.values[l.begins[id]:l.begins[id+1]]
 | |
| }
 | |
| 
 | |
| // size reports the number of value sequences in the lexicon.
 | |
| func (l *sequenceLexicon) size() int {
 | |
| 	// Subtract one because the list of begins starts out with the first element set to 0.
 | |
| 	return len(l.begins) - 1
 | |
| }
 | |
| 
 | |
| // hash returns a hash of this sequence of int32s.
 | |
| func hashSet(s []int32) uint32 {
 | |
| 	// TODO(roberts): We just need a way to nicely hash all the values down to
 | |
| 	// a 32-bit value. To ensure no unnecessary dependencies we use the core
 | |
| 	// library types available to do this. Is there a better option?
 | |
| 	a := adler32.New()
 | |
| 	binary.Write(a, binary.LittleEndian, s)
 | |
| 	return a.Sum32()
 | |
| }
 | |
| 
 | |
| // uniqueInt32s returns the sorted and uniqued set of int32s from the input.
 | |
| func uniqueInt32s(in []int32) []int32 {
 | |
| 	var vals []int32
 | |
| 	m := make(map[int32]bool)
 | |
| 	for _, i := range in {
 | |
| 		if m[i] {
 | |
| 			continue
 | |
| 		}
 | |
| 		m[i] = true
 | |
| 		vals = append(vals, i)
 | |
| 	}
 | |
| 	sort.Slice(vals, func(i, j int) bool { return vals[i] < vals[j] })
 | |
| 	return vals
 | |
| }
 |