archive: refactor to use archive "items" instead of StringSet (MR 22)

This adds a couple of new types, an archiveItem and what is effectively
a set of archiveItems. Items in the set are kept sorted.

fixes #10
This commit is contained in:
Clayton Craft
2022-09-10 00:29:52 -07:00
parent d78c6d5a62
commit 568fe7f717
3 changed files with 318 additions and 75 deletions

View File

@@ -674,7 +674,9 @@ func generateInitfs(name string, path string, kernVer string, devinfo deviceinfo
"/dev", "/tmp", "/lib", "/boot", "/sysroot", "/etc", "/dev", "/tmp", "/lib", "/boot", "/sysroot", "/etc",
} }
for _, dir := range requiredDirs { for _, dir := range requiredDirs {
initfsArchive.Dirs[dir] = false if err := initfsArchive.AddItem(dir, dir); err != nil {
return err
}
} }
if files, err := getInitfsFiles(devinfo); err != nil { if files, err := getInitfsFiles(devinfo); err != nil {

View File

@@ -6,20 +6,23 @@ package archive
import ( import (
"bytes" "bytes"
"compress/flate" "compress/flate"
"github.com/cavaliercoder/go-cpio"
"github.com/klauspost/pgzip"
"gitlab.com/postmarketOS/postmarketos-mkinitfs/pkgs/misc"
"fmt" "fmt"
"io" "io"
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"sort"
"strings" "strings"
"sync"
"syscall"
"github.com/cavaliercoder/go-cpio"
"github.com/klauspost/pgzip"
"gitlab.com/postmarketOS/postmarketos-mkinitfs/pkgs/misc"
) )
type Archive struct { type Archive struct {
Dirs misc.StringSet items archiveItems
Files misc.StringSet
cpioWriter *cpio.Writer cpioWriter *cpio.Writer
buf *bytes.Buffer buf *bytes.Buffer
} }
@@ -28,8 +31,6 @@ func New() (*Archive, error) {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
archive := &Archive{ archive := &Archive{
cpioWriter: cpio.NewWriter(buf), cpioWriter: cpio.NewWriter(buf),
Files: make(misc.StringSet),
Dirs: make(misc.StringSet),
buf: buf, buf: buf,
} }
@@ -41,6 +42,60 @@ type archiveItem struct {
header *cpio.Header header *cpio.Header
} }
type archiveItems struct {
items []archiveItem
sync.RWMutex
}
// Adds the given item to the archiveItems, only if it doesn't already exist in
// the list. The items are kept sorted in ascending order.
func (a *archiveItems) Add(item archiveItem) {
a.Lock()
defer a.Unlock()
if len(a.items) < 1 {
// empty list
a.items = append(a.items, item)
return
}
// find existing item, or index of where new item should go
i := sort.Search(len(a.items), func(i int) bool {
return strings.Compare(item.header.Name, a.items[i].header.Name) <= 0
})
if i >= len(a.items) {
// doesn't exist in list, but would be at the very end
a.items = append(a.items, item)
return
}
if strings.Compare(a.items[i].header.Name, item.header.Name) == 0 {
// already in list
return
}
// grow list by 1, shift right at index, and insert new string at index
a.items = append(a.items, archiveItem{})
copy(a.items[i+1:], a.items[i:])
a.items[i] = item
}
// iterate through items and send each one over the returned channel
func (a *archiveItems) IterItems() <-chan archiveItem {
ch := make(chan archiveItem)
go func() {
a.RLock()
defer a.RUnlock()
for _, item := range a.items {
ch <- item
}
close(ch)
}()
return ch
}
func (archive *Archive) Write(path string, mode os.FileMode) error { func (archive *Archive) Write(path string, mode os.FileMode) error {
if err := archive.writeCpio(); err != nil { if err := archive.writeCpio(); err != nil {
return err return err
@@ -78,6 +133,11 @@ func (archive *Archive) AddItem(source string, dest string) error {
sourceStat, err := os.Lstat(source) sourceStat, err := os.Lstat(source)
if err != nil { if err != nil {
e, ok := err.(*os.PathError)
if e.Err == syscall.ENOENT && ok {
// doesn't exist in current filesystem, assume it's a new directory
return archive.addDir(dest)
}
return fmt.Errorf("AddItem: failed to get stat for %q: %w", source, err) return fmt.Errorf("AddItem: failed to get stat for %q: %w", source, err)
} }
@@ -93,11 +153,6 @@ func (archive *Archive) addFile(source string, dest string) error {
return err return err
} }
if archive.Files[source] {
// Already written to cpio
return nil
}
sourceStat, err := os.Lstat(source) sourceStat, err := os.Lstat(source)
if err != nil { if err != nil {
log.Print("addFile: failed to stat file: ", source) log.Print("addFile: failed to stat file: ", source)
@@ -114,21 +169,18 @@ func (archive *Archive) addFile(source string, dest string) error {
} }
destFilename := strings.TrimPrefix(dest, "/") destFilename := strings.TrimPrefix(dest, "/")
hdr := &cpio.Header{
archive.items.Add(archiveItem{
sourcePath: source,
header: &cpio.Header{
Name: destFilename, Name: destFilename,
Linkname: target, Linkname: target,
Mode: 0644 | cpio.ModeSymlink, Mode: 0644 | cpio.ModeSymlink,
Size: int64(len(target)), Size: int64(len(target)),
// Checksum: 1, // Checksum: 1,
} },
if err := archive.cpioWriter.WriteHeader(hdr); err != nil { })
return err
}
if _, err = archive.cpioWriter.Write([]byte(target)); err != nil {
return err
}
archive.Files[source] = true
if filepath.Dir(target) == "." { if filepath.Dir(target) == "." {
target = filepath.Join(filepath.Dir(source), target) target = filepath.Join(filepath.Dir(source), target)
} }
@@ -146,30 +198,17 @@ func (archive *Archive) addFile(source string, dest string) error {
return err return err
} }
// log.Printf("writing file: %q", file)
fd, err := os.Open(source)
if err != nil {
return err
}
defer fd.Close()
destFilename := strings.TrimPrefix(dest, "/") destFilename := strings.TrimPrefix(dest, "/")
hdr := &cpio.Header{
archive.items.Add(archiveItem{
sourcePath: source,
header: &cpio.Header{
Name: destFilename, Name: destFilename,
Mode: cpio.FileMode(sourceStat.Mode().Perm()), Mode: cpio.FileMode(sourceStat.Mode().Perm()),
Size: sourceStat.Size(), Size: sourceStat.Size(),
// Checksum: 1, // Checksum: 1,
} },
if err := archive.cpioWriter.WriteHeader(hdr); err != nil { })
return err
}
if _, err = io.Copy(archive.cpioWriter, fd); err != nil {
return err
}
archive.Files[source] = true
return nil return nil
} }
@@ -207,29 +246,48 @@ func (archive *Archive) writeCompressed(path string, mode os.FileMode) error {
} }
func (archive *Archive) writeCpio() error { func (archive *Archive) writeCpio() error {
// Write any dirs added explicitly // having a transient function for actually adding files to the archive
for dir := range archive.Dirs { // allows the deferred fd.close to run after every copy and prevent having
archive.addDir(dir) // tons of open file handles until the copying is all done
copyToArchive := func(source string, header *cpio.Header) error {
if err := archive.cpioWriter.WriteHeader(header); err != nil {
return fmt.Errorf("archive.writeCpio: unable to write header: %w", err)
} }
// Write files and any missing parent dirs // don't copy actual dirs into the archive, writing the header is enough
for file, imported := range archive.Files { if !header.Mode.IsDir() {
if imported { if header.Mode.IsRegular() {
continue fd, err := os.Open(source)
if err != nil {
return fmt.Errorf("archive.writeCpio: uname to open file %q, %w", source, err)
} }
if err := archive.addFile(file, file); err != nil { defer fd.Close()
if _, err := io.Copy(archive.cpioWriter, fd); err != nil {
return fmt.Errorf("archive.writeCpio: unable to write out archive: %w", err)
}
} else if header.Linkname != "" {
// the contents of a symlink is just need the link name
if _, err := archive.cpioWriter.Write([]byte(header.Linkname)); err != nil {
return fmt.Errorf("archive.writeCpio: unable to write out symlink: %w", err)
}
} else {
return fmt.Errorf("archive.writeCpio: unknown type for file: %s", source)
}
}
return nil
}
for i := range archive.items.IterItems() {
if err := copyToArchive(i.sourcePath, i.header); err != nil {
return err return err
} }
} }
return nil return nil
} }
func (archive *Archive) addDir(dir string) error { func (archive *Archive) addDir(dir string) error {
if archive.Dirs[dir] {
// Already imported
return nil
}
if dir == "/" { if dir == "/" {
dir = "." dir = "."
} }
@@ -237,19 +295,13 @@ func (archive *Archive) addDir(dir string) error {
subdirs := strings.Split(strings.TrimPrefix(dir, "/"), "/") subdirs := strings.Split(strings.TrimPrefix(dir, "/"), "/")
for i, subdir := range subdirs { for i, subdir := range subdirs {
path := filepath.Join(strings.Join(subdirs[:i], "/"), subdir) path := filepath.Join(strings.Join(subdirs[:i], "/"), subdir)
if archive.Dirs[path] { archive.items.Add(archiveItem{
// Subdir already imported sourcePath: path,
continue header: &cpio.Header{
}
err := archive.cpioWriter.WriteHeader(&cpio.Header{
Name: path, Name: path,
Mode: cpio.ModeDir | 0755, Mode: cpio.ModeDir | 0755,
},
}) })
if err != nil {
return err
}
archive.Dirs[path] = true
// log.Print("wrote dir: ", path)
} }
return nil return nil

View File

@@ -0,0 +1,189 @@
// Copyright 2022 Clayton Craft <clayton@craftyguy.net>
// SPDX-License-Identifier: GPL-3.0-or-later
package archive
import (
"reflect"
"testing"
"github.com/cavaliercoder/go-cpio"
)
func TestArchiveItemsAdd(t *testing.T) {
subtests := []struct {
name string
inItems []archiveItem
inItem archiveItem
expected []archiveItem
}{
{
name: "empty list",
inItems: []archiveItem{},
inItem: archiveItem{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
expected: []archiveItem{
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
},
},
{
name: "already exists",
inItems: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
},
inItem: archiveItem{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
expected: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
},
},
{
name: "add new",
inItems: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
{
sourcePath: "/foo/bar1",
header: &cpio.Header{Name: "/foo/bar1"},
},
},
inItem: archiveItem{
sourcePath: "/foo/bar0",
header: &cpio.Header{Name: "/foo/bar0"},
},
expected: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
{
sourcePath: "/foo/bar0",
header: &cpio.Header{Name: "/foo/bar0"},
},
{
sourcePath: "/foo/bar1",
header: &cpio.Header{Name: "/foo/bar1"},
},
},
},
{
name: "add new at beginning",
inItems: []archiveItem{
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
},
inItem: archiveItem{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
expected: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/foo/bar",
header: &cpio.Header{Name: "/foo/bar"},
},
},
},
{
name: "add new at end",
inItems: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
},
inItem: archiveItem{
sourcePath: "/zzz/bazz",
header: &cpio.Header{Name: "/zzz/bazz"},
},
expected: []archiveItem{
{
sourcePath: "/bazz/bar",
header: &cpio.Header{Name: "/bazz/bar"},
},
{
sourcePath: "/foo",
header: &cpio.Header{Name: "/foo"},
},
{
sourcePath: "/zzz/bazz",
header: &cpio.Header{Name: "/zzz/bazz"},
},
},
},
}
for _, st := range subtests {
t.Run(st.name, func(t *testing.T) {
a := archiveItems{items: st.inItems}
a.Add(st.inItem)
if !reflect.DeepEqual(st.expected, a.items) {
t.Fatal("expected:", st.expected, " got: ", a.items)
}
})
}
}