Unmarshal log

This commit is contained in:
Dan Jones 2024-02-03 18:52:12 -06:00
commit 85abc8cb34
2 changed files with 180 additions and 0 deletions

View file

@ -1 +1,69 @@
package models package models
import (
"bufio"
"bytes"
"regexp"
"sync"
)
var reg = regexp.MustCompile("(?sm)^@begin .+?(^| )@end")
type Log struct {
Name string
Entries []Entry
}
func (l *Log) UnmarshalText(in []byte) error {
ch := l.getLogUnarshalChan(in)
for entry := range ch {
l.Entries = append(l.Entries, entry)
}
return nil
}
func scanLog(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
// done
return 0, nil, nil
}
m := reg.FindIndex(data)
if len(m) == 0 && atEOF {
// all trash
return len(data), nil, nil
} else if len(m) == 0 && !atEOF {
// get more
return 0, nil, nil
}
return m[1], data[m[0]:m[1]], nil
}
func (l *Log) getLogUnarshalChan(in []byte) chan Entry {
size := len(in) / 10 // rough estimation
ch := make(chan Entry, size)
var wg sync.WaitGroup
read := bytes.NewReader(in)
scan := bufio.NewScanner(read)
scan.Split(scanLog)
for scan.Scan() {
wg.Add(1)
go func(field []byte) {
defer wg.Done()
f := new(Entry)
err := f.UnmarshalText(field)
if err != nil {
return
}
ch <- *f
}(scan.Bytes())
}
go func() {
wg.Wait()
close(ch)
}()
return ch
}

112
models/log_test.go Normal file
View file

@ -0,0 +1,112 @@
package models
import (
"encoding"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var _ encoding.TextUnmarshaler = new(Log)
const first = "@begin January 01, 2020 at 01:02:03AM -0000 - This is simple @end\n"
const second = `@begin January 01, 2020 at 01:02:05AM -0000 - We have one thing here
@foo bar @end
`
const third = `@begin January 01, 2020 at 01:02:07AM -0000 - We have two things here
@num 42
@newline true
@end
`
const fourth = `@begin 2020-01-01T01:02:09Z - ISO-8601 date
@end
`
const skip = "@ignoreme true\n"
const fifth = `@begin 2020-01-01T01:02:11+00:00 - ISO-8601 other date
@with-timezone yes @end
`
const badEntry = "@begin bad date no title @end\n"
const all = first + second + third + fourth + skip + fifth
func TestLogUnmarshalBig(t *testing.T) {
l := &Log{Name: "test-log"}
err := l.UnmarshalText([]byte(all))
require.NoError(t, err)
require.Len(t, l.Entries, 5)
var e Entry
var f bool
if e, f = findEntry(t, l, "This is simple", true); !f {
return
}
assert.Len(t, e.Fields, 0)
for _, e := range l.Entries {
findMeta(t, e, "ignoreme", true, false)
}
}
func TestLogUnmarshalIgnoreGarbage(t *testing.T) {
l := &Log{Name: "test-log"}
in := "ignore this\n" + second + "some crap also skip -> " + third + skip
err := l.UnmarshalText([]byte(in))
require.NoError(t, err)
require.Len(t, l.Entries, 1)
en := l.Entries[0]
assert.Equal(t, "We have one thing here", en.Title)
assert.Len(t, en.Fields, 1)
assert.Equal(t, "foo", en.Fields[0].Key)
assert.Equal(t, "bar", en.Fields[0].Value)
}
func TestLogUnmarshalEmpty(t *testing.T) {
l := &Log{Name: "test-log"}
err := l.UnmarshalText([]byte{})
require.NoError(t, err)
require.Len(t, l.Entries, 0)
}
func TestLogUnmarshalBad(t *testing.T) {
l := &Log{Name: "test-log"}
err := l.UnmarshalText([]byte(badEntry))
require.NoError(t, err)
require.Len(t, l.Entries, 0)
}
func findEntry(t *testing.T, log *Log, title string, shouldFind bool) (Entry, bool) {
var ret Entry
found := false
for _, e := range log.Entries {
if e.Title == title {
ret = e
found = true
}
}
if shouldFind {
found = assert.Truef(t, found, "Unable to found entry %s", title)
} else {
found = assert.Falsef(t, found, "Entry %s should not have been found but was", title)
}
return ret, found
}
func findMeta(t *testing.T, entry Entry, key string, value any, shouldFind bool) (Meta, bool) {
var ret Meta
found := false
for _, m := range entry.Fields {
if m.Key == key && m.Value == value {
ret = m
found = true
}
}
if shouldFind {
found = assert.Truef(t, found, "Unable to found meta %s", key)
} else {
found = assert.Falsef(t, found, "Meta %s should not have been found but was", key)
}
return ret, found
}