[feature] support processing of (many) more media types (#3090)

* initial work replacing our media decoding / encoding pipeline with ffprobe + ffmpeg

* specify the video codec to use when generating static image from emoji

* update go-storage library (fixes incompatibility after updating go-iotools)

* maintain image aspect ratio when generating a thumbnail for it

* update readme to show go-ffmpreg

* fix a bunch of media tests, move filesize checking to callers of media manager for more flexibility

* remove extra debug from error message

* fix up incorrect function signatures

* update PutFile to just use regular file copy, as changes are file is on separate partition

* fix remaining tests, remove some unneeded tests now we're working with ffmpeg/ffprobe

* update more tests, add more code comments

* add utilities to generate processed emoji / media outputs

* fix remaining tests

* add test for opus media file, add license header to utility cmds

* limit the number of concurrently available ffmpeg / ffprobe instances

* reduce number of instances

* further reduce number of instances

* fix envparsing test with configuration variables

* update docs and configuration with new media-{local,remote}-max-size variables
This commit is contained in:
kim 2024-07-12 09:39:47 +00:00 committed by GitHub
commit cde2fb6244
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
376 changed files with 8026 additions and 54091 deletions

View file

@ -1,9 +0,0 @@
MIT LICENSE
Copyright 2020 Dustin Oprea
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -1,81 +0,0 @@
package pngstructure
import (
"bytes"
"fmt"
"encoding/binary"
)
type ChunkDecoder struct {
}
func NewChunkDecoder() *ChunkDecoder {
return new(ChunkDecoder)
}
func (cd *ChunkDecoder) Decode(c *Chunk) (decoded interface{}, err error) {
switch c.Type {
case "IHDR":
return cd.decodeIHDR(c)
}
// We don't decode this type.
return nil, nil
}
type ChunkIHDR struct {
Width uint32
Height uint32
BitDepth uint8
ColorType uint8
CompressionMethod uint8
FilterMethod uint8
InterlaceMethod uint8
}
func (ihdr *ChunkIHDR) String() string {
return fmt.Sprintf("IHDR<WIDTH=(%d) HEIGHT=(%d) DEPTH=(%d) COLOR-TYPE=(%d) COMP-METHOD=(%d) FILTER-METHOD=(%d) INTRLC-METHOD=(%d)>",
ihdr.Width, ihdr.Height, ihdr.BitDepth, ihdr.ColorType, ihdr.CompressionMethod, ihdr.FilterMethod, ihdr.InterlaceMethod,
)
}
func (cd *ChunkDecoder) decodeIHDR(c *Chunk) (*ChunkIHDR, error) {
var (
b = bytes.NewBuffer(c.Data)
ihdr = new(ChunkIHDR)
readf = func(data interface{}) error {
return binary.Read(b, binary.BigEndian, data)
}
)
if err := readf(&ihdr.Width); err != nil {
return nil, err
}
if err := readf(&ihdr.Height); err != nil {
return nil, err
}
if err := readf(&ihdr.BitDepth); err != nil {
return nil, err
}
if err := readf(&ihdr.ColorType); err != nil {
return nil, err
}
if err := readf(&ihdr.CompressionMethod); err != nil {
return nil, err
}
if err := readf(&ihdr.FilterMethod); err != nil {
return nil, err
}
if err := readf(&ihdr.InterlaceMethod); err != nil {
return nil, err
}
return ihdr, nil
}

View file

@ -1,85 +0,0 @@
package pngstructure
import (
"bufio"
"bytes"
"image"
"io"
"os"
"image/png"
riimage "github.com/dsoprea/go-utility/v2/image"
)
// PngMediaParser knows how to parse a PNG stream.
type PngMediaParser struct {
}
// NewPngMediaParser returns a new `PngMediaParser`.
func NewPngMediaParser() riimage.MediaParser {
return new(PngMediaParser)
}
// Parse parses a PNG stream given a `io.ReadSeeker`.
func (pmp *PngMediaParser) Parse(
rs io.ReadSeeker,
size int,
) (riimage.MediaContext, error) {
ps := NewPngSplitter()
if err := ps.readHeader(rs); err != nil {
return nil, err
}
s := bufio.NewScanner(rs)
// Since each segment can be any
// size, our buffer must be allowed
// to grow as large as the file.
buffer := []byte{}
s.Buffer(buffer, size)
s.Split(ps.Split)
for s.Scan() {
}
if err := s.Err(); err != nil {
return nil, err
}
return ps.Chunks()
}
// ParseFile parses a PNG stream given a file-path.
func (pmp *PngMediaParser) ParseFile(filepath string) (riimage.MediaContext, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
return nil, err
}
size := stat.Size()
return pmp.Parse(f, int(size))
}
// ParseBytes parses a PNG stream given a byte-slice.
func (pmp *PngMediaParser) ParseBytes(data []byte) (riimage.MediaContext, error) {
br := bytes.NewReader(data)
return pmp.Parse(br, len(data))
}
// LooksLikeFormat returns a boolean indicating
// whether the stream looks like a PNG image.
func (pmp *PngMediaParser) LooksLikeFormat(data []byte) bool {
return bytes.Equal(data[:len(PngSignature)], PngSignature[:])
}
// GetImage returns an image.Image-compatible struct.
func (pmp *PngMediaParser) GetImage(r io.Reader) (img image.Image, err error) {
return png.Decode(r)
}

View file

@ -1,386 +0,0 @@
package pngstructure
import (
"bytes"
"errors"
"fmt"
"io"
"encoding/binary"
"hash/crc32"
"github.com/dsoprea/go-exif/v3"
exifcommon "github.com/dsoprea/go-exif/v3/common"
riimage "github.com/dsoprea/go-utility/v2/image"
)
var (
PngSignature = [8]byte{137, 'P', 'N', 'G', '\r', '\n', 26, '\n'}
EXifChunkType = "eXIf"
IHDRChunkType = "IHDR"
)
var (
ErrNotPng = errors.New("not png data")
ErrCrcFailure = errors.New("crc failure")
)
// ChunkSlice encapsulates a slice of chunks.
type ChunkSlice struct {
chunks []*Chunk
}
func NewChunkSlice(chunks []*Chunk) (*ChunkSlice, error) {
if len(chunks) == 0 {
err := errors.New("ChunkSlice must be initialized with at least one chunk (IHDR)")
return nil, err
} else if chunks[0].Type != IHDRChunkType {
err := errors.New("first chunk in any ChunkSlice must be an IHDR")
return nil, err
}
return &ChunkSlice{chunks}, nil
}
func NewPngChunkSlice() (*ChunkSlice, error) {
ihdrChunk := &Chunk{
Type: IHDRChunkType,
}
ihdrChunk.UpdateCrc32()
return NewChunkSlice([]*Chunk{ihdrChunk})
}
func (cs *ChunkSlice) String() string {
return fmt.Sprintf("ChunkSlize<LEN=(%d)>", len(cs.chunks))
}
// Chunks exposes the actual slice.
func (cs *ChunkSlice) Chunks() []*Chunk {
return cs.chunks
}
// Write encodes and writes all chunks.
func (cs *ChunkSlice) WriteTo(w io.Writer) error {
if _, err := w.Write(PngSignature[:]); err != nil {
return err
}
// TODO(dustin): !! This should respect
// the safe-to-copy characteristic.
for _, c := range cs.chunks {
if _, err := c.WriteTo(w); err != nil {
return err
}
}
return nil
}
// Index returns a map of chunk types to chunk slices, grouping all like chunks.
func (cs *ChunkSlice) Index() (index map[string][]*Chunk) {
index = make(map[string][]*Chunk)
for _, c := range cs.chunks {
if grouped, found := index[c.Type]; found {
index[c.Type] = append(grouped, c)
} else {
index[c.Type] = []*Chunk{c}
}
}
return index
}
// FindExif returns the the segment that hosts the EXIF data.
func (cs *ChunkSlice) FindExif() (chunk *Chunk, err error) {
index := cs.Index()
if chunks, found := index[EXifChunkType]; found {
return chunks[0], nil
}
return nil, exif.ErrNoExif
}
// Exif returns an `exif.Ifd` instance with the existing tags.
func (cs *ChunkSlice) Exif() (*exif.Ifd, []byte, error) {
chunk, err := cs.FindExif()
if err != nil {
return nil, nil, err
}
im, err := exifcommon.NewIfdMappingWithStandard()
if err != nil {
return nil, nil, err
}
ti := exif.NewTagIndex()
_, index, err := exif.Collect(im, ti, chunk.Data)
if err != nil {
return nil, nil, err
}
return index.RootIfd, chunk.Data, nil
}
// ConstructExifBuilder returns an `exif.IfdBuilder` instance
// (needed for modifying) preloaded with all existing tags.
func (cs *ChunkSlice) ConstructExifBuilder() (*exif.IfdBuilder, error) {
rootIfd, _, err := cs.Exif()
if err != nil {
return nil, err
}
return exif.NewIfdBuilderFromExistingChain(rootIfd), nil
}
// SetExif encodes and sets EXIF data into this segment.
func (cs *ChunkSlice) SetExif(ib *exif.IfdBuilder) error {
// Encode.
ibe := exif.NewIfdByteEncoder()
exifData, err := ibe.EncodeToExif(ib)
if err != nil {
return err
}
// Set.
exifChunk, err := cs.FindExif()
switch {
case err == nil:
// EXIF chunk already exists.
exifChunk.Data = exifData
exifChunk.Length = uint32(len(exifData))
case errors.Is(err, exif.ErrNoExif):
// Add a EXIF chunk for the first time.
exifChunk = &Chunk{
Type: EXifChunkType,
Data: exifData,
Length: uint32(len(exifData)),
}
// Insert exif after the IHDR chunk; it's
// a reliably appropriate place to put it.
cs.chunks = append(
cs.chunks[:1],
append(
[]*Chunk{exifChunk},
cs.chunks[1:]...,
)...,
)
default:
return err
}
exifChunk.UpdateCrc32()
return nil
}
// PngSplitter hosts the princpal `Split()`
// method uses by `bufio.Scanner`.
type PngSplitter struct {
chunks []*Chunk
currentOffset int
doCheckCrc bool
crcErrors []string
}
func (ps *PngSplitter) Chunks() (*ChunkSlice, error) {
return NewChunkSlice(ps.chunks)
}
func (ps *PngSplitter) DoCheckCrc(doCheck bool) {
ps.doCheckCrc = doCheck
}
func (ps *PngSplitter) CrcErrors() []string {
return ps.crcErrors
}
func NewPngSplitter() *PngSplitter {
return &PngSplitter{
chunks: make([]*Chunk, 0),
doCheckCrc: true,
crcErrors: make([]string, 0),
}
}
// Chunk describes a single chunk.
type Chunk struct {
Offset int
Length uint32
Type string
Data []byte
Crc uint32
}
func (c *Chunk) String() string {
return fmt.Sprintf("Chunk<OFFSET=(%d) LENGTH=(%d) TYPE=[%s] CRC=(%d)>", c.Offset, c.Length, c.Type, c.Crc)
}
func calculateCrc32(chunk *Chunk) uint32 {
c := crc32.NewIEEE()
c.Write([]byte(chunk.Type))
c.Write(chunk.Data)
return c.Sum32()
}
func (c *Chunk) UpdateCrc32() {
c.Crc = calculateCrc32(c)
}
func (c *Chunk) CheckCrc32() bool {
expected := calculateCrc32(c)
return c.Crc == expected
}
// Bytes encodes and returns the bytes for this chunk.
func (c *Chunk) Bytes() ([]byte, error) {
if len(c.Data) != int(c.Length) {
return nil, errors.New("length of data not correct")
}
b := make([]byte, 0, 4+4+c.Length+4)
b = binary.BigEndian.AppendUint32(b, c.Length)
b = append(b, c.Type...)
b = append(b, c.Data...)
b = binary.BigEndian.AppendUint32(b, c.Crc)
return b, nil
}
// Write encodes and writes the bytes for this chunk.
func (c *Chunk) WriteTo(w io.Writer) (int, error) {
if len(c.Data) != int(c.Length) {
return 0, errors.New("length of data not correct")
}
var n int
b := make([]byte, 4) // uint32 buf
binary.BigEndian.PutUint32(b, c.Length)
if nn, err := w.Write(b); err != nil {
return n + nn, err
}
n += len(b)
if nn, err := io.WriteString(w, c.Type); err != nil {
return n + nn, err
}
n += len(c.Type)
if nn, err := w.Write(c.Data); err != nil {
return n + nn, err
}
n += len(c.Data)
binary.BigEndian.PutUint32(b, c.Crc)
if nn, err := w.Write(b); err != nil {
return n + nn, err
}
n += len(b)
return n, nil
}
// readHeader verifies that the PNG header bytes appear next.
func (ps *PngSplitter) readHeader(r io.Reader) error {
var (
sigLen = len(PngSignature)
header = make([]byte, sigLen)
)
if _, err := r.Read(header); err != nil {
return err
}
ps.currentOffset += sigLen
if !bytes.Equal(header, PngSignature[:]) {
return ErrNotPng
}
return nil
}
// Split fulfills the `bufio.SplitFunc`
// function definition for `bufio.Scanner`.
func (ps *PngSplitter) Split(
data []byte,
atEOF bool,
) (
advance int,
token []byte,
err error,
) {
// We might have more than one chunk's worth, and,
// if `atEOF` is true, we won't be called again.
// We'll repeatedly try to read additional chunks,
// but, when we run out of the data we were given
// then we'll return the number of bytes for the
// chunks we've already completely read. Then, we'll
// be called again from the end ofthose bytes, at
// which point we'll indicate that we don't yet have
// enough for another chunk, and we should be then
// called with more.
for {
len_ := len(data)
if len_ < 8 {
return advance, nil, nil
}
length := binary.BigEndian.Uint32(data[:4])
type_ := string(data[4:8])
chunkSize := (8 + int(length) + 4)
if len_ < chunkSize {
return advance, nil, nil
}
crcIndex := 8 + length
crc := binary.BigEndian.Uint32(data[crcIndex : crcIndex+4])
content := make([]byte, length)
copy(content, data[8:8+length])
c := &Chunk{
Length: length,
Type: type_,
Data: content,
Crc: crc,
Offset: ps.currentOffset,
}
ps.chunks = append(ps.chunks, c)
if !c.CheckCrc32() {
ps.crcErrors = append(ps.crcErrors, type_)
if ps.doCheckCrc {
err = ErrCrcFailure
return
}
}
advance += chunkSize
ps.currentOffset += chunkSize
data = data[chunkSize:]
}
}
var (
// Enforce interface conformance.
_ riimage.MediaContext = new(ChunkSlice)
)

View file

@ -1,77 +0,0 @@
package pngstructure
import (
"fmt"
"os"
"path"
)
var (
assetsPath = "assets"
)
func getModuleRootPath() (string, error) {
moduleRootPath := os.Getenv("PNG_MODULE_ROOT_PATH")
if moduleRootPath != "" {
return moduleRootPath, nil
}
currentWd, err := os.Getwd()
if err != nil {
return "", err
}
currentPath := currentWd
visited := make([]string, 0)
for {
tryStampFilepath := path.Join(currentPath, ".MODULE_ROOT")
_, err := os.Stat(tryStampFilepath)
if err != nil && !os.IsNotExist(err) {
return "", err
} else if err == nil {
break
}
visited = append(visited, tryStampFilepath)
currentPath = path.Dir(currentPath)
if currentPath == "/" {
return "", fmt.Errorf("could not find module-root: %v", visited)
}
}
return currentPath, nil
}
func getTestAssetsPath() (string, error) {
if assetsPath == "" {
moduleRootPath, err := getModuleRootPath()
if err != nil {
return "", err
}
assetsPath = path.Join(moduleRootPath, "assets")
}
return assetsPath, nil
}
func getTestBasicImageFilepath() (string, error) {
assetsPath, err := getTestAssetsPath()
if err != nil {
return "", err
}
return path.Join(assetsPath, "libpng.png"), nil
}
func getTestExifImageFilepath() (string, error) {
assetsPath, err := getTestAssetsPath()
if err != nil {
return "", err
}
return path.Join(assetsPath, "exif.png"), nil
}

View file

@ -1,67 +0,0 @@
package pngstructure
import (
"bytes"
"fmt"
)
func DumpBytes(data []byte) {
fmt.Printf("DUMP: ")
for _, x := range data {
fmt.Printf("%02x ", x)
}
fmt.Printf("\n")
}
func DumpBytesClause(data []byte) {
fmt.Printf("DUMP: ")
fmt.Printf("[]byte { ")
for i, x := range data {
fmt.Printf("0x%02x", x)
if i < len(data)-1 {
fmt.Printf(", ")
}
}
fmt.Printf(" }\n")
}
func DumpBytesToString(data []byte) (string, error) {
b := new(bytes.Buffer)
for i, x := range data {
if _, err := b.WriteString(fmt.Sprintf("%02x", x)); err != nil {
return "", err
}
if i < len(data)-1 {
if _, err := b.WriteRune(' '); err != nil {
return "", err
}
}
}
return b.String(), nil
}
func DumpBytesClauseToString(data []byte) (string, error) {
b := new(bytes.Buffer)
for i, x := range data {
if _, err := b.WriteString(fmt.Sprintf("0x%02x", x)); err != nil {
return "", err
}
if i < len(data)-1 {
if _, err := b.WriteString(", "); err != nil {
return "", err
}
}
}
return b.String(), nil
}