[bugfix] Update exif-terminator (fix png issue) (#2391)

* [bugfix] Update exif-terminator (fix png issue)

* bump exif terminator

* fix tests
This commit is contained in:
tobi 2023-11-30 10:50:28 +01:00 committed by GitHub
commit 0108463e7b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 752 additions and 830 deletions

View file

@ -1,47 +0,0 @@
/*
exif-terminator
Copyright (C) 2022 SuperSeriousBusiness admin@gotosocial.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package terminator
import "fmt"
var logger ErrorLogger
func init() {
logger = &defaultErrorLogger{}
}
// ErrorLogger denotes a generic error logging function.
type ErrorLogger interface {
Error(args ...interface{})
}
type defaultErrorLogger struct{}
func (d *defaultErrorLogger) Error(args ...interface{}) {
fmt.Println(args...)
}
// SetErrorLogger allows a user of the exif-terminator library
// to set the logger that will be used for error logging.
//
// If it is not set, the default error logger will be used, which
// just prints errors to stdout.
func SetErrorLogger(errorLogger ErrorLogger) {
logger = errorLogger
}

View file

@ -19,10 +19,9 @@
package terminator
import (
"encoding/binary"
"io"
pngstructure "github.com/dsoprea/go-png-image-structure/v2"
pngstructure "github.com/superseriousbusiness/go-png-image-structure/v2"
)
type pngVisitor struct {
@ -45,49 +44,50 @@ func (v *pngVisitor) split(data []byte, atEOF bool) (int, []byte, error) {
}
}
// check if the splitter has any new chunks in it that we haven't written yet
chunkSlice := v.ps.Chunks()
// Check if the splitter now has
// any new chunks in it for us.
chunkSlice, err := v.ps.Chunks()
if err != nil {
return advance, token, err
}
// Write each chunk by passing it
// through our custom write func,
// which strips out exif and fixes
// the CRC of each chunk.
chunks := chunkSlice.Chunks()
for i, chunk := range chunks {
// look through all the chunks in the splitter
if i > v.lastWrittenChunk {
// we've got a chunk we haven't written yet! write it...
if err := v.writeChunk(chunk); err != nil {
return advance, token, err
}
// then remove the data
chunk.Data = chunk.Data[:0]
// and update
v.lastWrittenChunk = i
if i <= v.lastWrittenChunk {
// Skip already
// written chunks.
continue
}
// Write this new chunk.
if err := v.writeChunk(chunk); err != nil {
return advance, token, err
}
v.lastWrittenChunk = i
// Zero data; here you
// go garbage collector.
chunk.Data = nil
}
return advance, token, err
}
func (v *pngVisitor) writeChunk(chunk *pngstructure.Chunk) error {
if err := binary.Write(v.writer, binary.BigEndian, chunk.Length); err != nil {
return err
}
if _, err := v.writer.Write([]byte(chunk.Type)); err != nil {
return err
}
if chunk.Type == pngstructure.EXifChunkType {
blank := make([]byte, len(chunk.Data))
if _, err := v.writer.Write(blank); err != nil {
return err
}
} else {
if _, err := v.writer.Write(chunk.Data); err != nil {
return err
}
// Replace exif data
// with zero bytes.
clear(chunk.Data)
}
if err := binary.Write(v.writer, binary.BigEndian, chunk.Crc); err != nil {
return err
}
// Fix CRC of each chunk.
chunk.UpdateCrc32()
return nil
// finally, write chunk to writer.
_, err := chunk.WriteTo(v.writer)
return err
}

View file

@ -25,29 +25,34 @@ import (
"fmt"
"io"
pngstructure "github.com/dsoprea/go-png-image-structure/v2"
jpegstructure "github.com/superseriousbusiness/go-jpeg-image-structure/v2"
pngstructure "github.com/superseriousbusiness/go-png-image-structure/v2"
)
func Terminate(in io.Reader, fileSize int, mediaType string) (io.Reader, error) {
// to avoid keeping too much stuff in memory we want to pipe data directly
// To avoid keeping too much stuff
// in memory we want to pipe data
// directly to the reader.
pipeReader, pipeWriter := io.Pipe()
// we don't know ahead of time how long segments might be: they could be as large as
// the file itself, so unfortunately we need to allocate a buffer here that'scanner as large
// as the file
// We don't know ahead of time how long
// segments might be: they could be as
// large as the file itself, so we need
// a buffer with generous overhead.
scanner := bufio.NewScanner(in)
scanner.Buffer([]byte{}, fileSize)
var err error
var err error
switch mediaType {
case "image/jpeg", "jpeg", "jpg":
err = terminateJpeg(scanner, pipeWriter, fileSize)
case "image/webp", "webp":
err = terminateWebp(scanner, pipeWriter)
case "image/png", "png":
// for pngs we need to skip the header bytes, so read them in
// and check we're really dealing with a png here
// For pngs we need to skip the header bytes, so read
// them in and check we're really dealing with a png.
header := make([]byte, len(pngstructure.PngSignature))
if _, headerError := in.Read(header); headerError != nil {
err = headerError
@ -67,68 +72,87 @@ func Terminate(in io.Reader, fileSize int, mediaType string) (io.Reader, error)
return pipeReader, err
}
func terminateJpeg(scanner *bufio.Scanner, writer io.WriteCloser, expectedFileSize int) error {
// jpeg visitor is where the spicy hack of streaming the de-exifed data is contained
func terminateJpeg(scanner *bufio.Scanner, writer *io.PipeWriter, expectedFileSize int) error {
v := &jpegVisitor{
writer: writer,
expectedFileSize: expectedFileSize,
}
// provide the visitor to the splitter so that it triggers on every section scan
// Provide the visitor to the splitter so
// that it triggers on every section scan.
js := jpegstructure.NewJpegSplitter(v)
// the visitor also needs to read back the list of segments: for this it needs
// to know what jpeg splitter it's attached to, so give it a pointer to the splitter
// The visitor also needs to read back the
// list of segments: for this it needs to
// know what jpeg splitter it's attached to,
// so give it a pointer to the splitter.
v.js = js
// use the jpeg splitters 'split' function, which satisfies the bufio.SplitFunc interface
// Jpeg visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(js.Split)
scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}
func terminateWebp(scanner *bufio.Scanner, writer io.WriteCloser) error {
func terminateWebp(scanner *bufio.Scanner, writer *io.PipeWriter) error {
v := &webpVisitor{
writer: writer,
}
// use the webp visitor's 'split' function, which satisfies the bufio.SplitFunc interface
// Webp visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(v.split)
scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}
func terminatePng(scanner *bufio.Scanner, writer io.WriteCloser) error {
func terminatePng(scanner *bufio.Scanner, writer *io.PipeWriter) error {
ps := pngstructure.NewPngSplitter()
// Don't bother checking CRC;
// we're overwriting it anyway.
ps.DoCheckCrc(false)
v := &pngVisitor{
ps: ps,
writer: writer,
lastWrittenChunk: -1,
}
// use the png visitor's 'split' function, which satisfies the bufio.SplitFunc interface
// Png visitor's 'split' function
// satisfies bufio.SplitFunc{}.
scanner.Split(v.split)
scanAndClose(scanner, writer)
go scanAndClose(scanner, writer)
return nil
}
func scanAndClose(scanner *bufio.Scanner, writer io.WriteCloser) {
// scan asynchronously until there's nothing left to scan, and then close the writer
// so that the reader on the other side knows that we're done
//
// due to the nature of io.Pipe, writing won't actually work
// until the pipeReader starts being read by the caller, which
// is why we do this asynchronously
go func() {
defer writer.Close()
for scanner.Scan() {
}
if scanner.Err() != nil {
logger.Error(scanner.Err())
}
// scanAndClose scans through the given scanner until there's
// nothing left to scan, and then closes the writer so that the
// reader on the other side of the pipe knows that we're done.
//
// Any error encountered when scanning will be logged by terminator.
//
// Due to the nature of io.Pipe, writing won't actually work
// until the pipeReader starts being read by the caller, which
// is why this function should always be called asynchronously.
func scanAndClose(scanner *bufio.Scanner, writer *io.PipeWriter) {
var err error
defer func() {
// Always close writer, using returned
// scanner error (if any). If err is nil
// then the standard io.EOF will be used.
// (this will not overwrite existing).
writer.CloseWithError(err)
}()
for scanner.Scan() {
}
// Set error on return.
err = scanner.Err()
}