From a4ea980f2d023f22c7eede4c142190115449de5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20Alberto=20Gim=C3=A9nez?= Date: Mon, 14 Sep 2020 20:44:56 +0200 Subject: [PATCH] Add tarfs implementation (reopen #265) (#266) * Initial commit for tarfs * tarfs: reword "open" status field * tarfs: use TestMain for FS setup We want to have the FS variable available through all the tests, so we we use a common "setup" function to initialise it. * tarfs: test: early exit for nonexisting files * tarfs: create test for filesystem Open * tarfs: implement File.Stat * tarfs: implement Fs.Open * tarfs: return error on non-supported methods As tarfs is a read-only filesystem backend, we return EROFS (Read-only file system) from any method that makes modifications. * tarfs: implement File.data as bytes.Reader Most of the operations that we want to implement for tarfs.File are already defined in bytes.Reader. We could use a plain slice and implement all the seeking manually, but I think using this is more convenient. * tarfs: short format for simple methods * tarfs: add missing closing brace in tests * tarfs: add test for File.ReadAt * tarfs: test File.ReadAt * tarfs: add tests for File.Read * tarfs: implement File.Read * tarfs: add tests for File.Seek * tarfs: implement File.Seek * tarfs: add tests for File.Name * tarfs: implement File.Name * tarfs: add tests for File.Close * tarfs: implement File.Close * tarfs: add tests for OpenFile * tarfs: fix test for Fs.OpenFile If the call fails, we don't have to close the file * tarfs: remove code not needed after using filepath.Clean * tarfs: Open: return a copy of the internal structure As we modify the struct fields when closing, we don't want to lose the internal representation of the file, in case we want to reopen it. Return a copy of the File, although we keep using the same pointers to tar.Header and buffer.Reader. Maybe we will need to change that in the future. * tarfs: implement Fs.OpenFile * tarfs: use Fatalf for unexpected error in TestFsOpen * tarfs: add tests for Fs.Stat * tarfs: implement Fs.Stat * tarfs: remove TestNewFs That test depends too much on the internal imlementation, and it is easier to break if we change it. * tarfs: remove unused code * tarfs: change internal implementation To be able to handle directories (File.Readdir, File.Readdirnames), the naive single-map implementation makes it a bit harder to implement. Inspired by the zipfs backend, switch to an internal implementation of a map of directories that contains a map of files, so the directory methods are easier to implement. Also, treat the "virtual" filesystem as absolute, just like zipfs does. * tarfs: use Fatal errors to avoid panics * tarfs: add pseudoroot * tarfs: add tests for File.Readdir * tarfs: add pointer Fs in the File structure For directory-related operations we will need to access the internal structure in the Fs. As Readdir and Readdirnames are File methods, we need to access such structure from the File. * tarfs: fix error * tarfs: use just the names for TestReaddir, easier than using fill os.FileInfo entries * tarfs: create a copy of the original entry when opening a file We added the fs field in the File struct to reference the underlying Fs object, but in the Open cal we were not passing it, making all the opened files to have a nil pointer in that field. Change to make a copy of the original file, and returning that * tarfs: implement File.Readdir * tarfs: add tests for File.Readdirnames * tarfs: implement Readdirnames * tarfs: add test for File.Name * tarfs: change tests to use the Afero interface instead * tarfs: add tests for Glob from zipfs * tarfs: update main repo references to tarfs * tarfs: use OS-specific file separator for pseudoroot * tarfs: fix path handling in Windows systems --- .travis.yml | 2 +- README.md | 1 - tarfs/file.go | 144 ++++++++++++++++ tarfs/fs.go | 134 +++++++++++++++ tarfs/tarfs_test.go | 402 +++++++++++++++++++++++++++++++++++++++++++ tarfs/testdata/t.tar | Bin 0 -> 30720 bytes 6 files changed, 681 insertions(+), 2 deletions(-) create mode 100644 tarfs/file.go create mode 100644 tarfs/fs.go create mode 100644 tarfs/tarfs_test.go create mode 100644 tarfs/testdata/t.tar diff --git a/.travis.yml b/.travis.yml index fdaa999..1459644 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,4 @@ script: - go build -v ./... - go test -count=1 -cover -race -v ./... - go vet ./... - - FILES=$(gofmt -s -l . zipfs sftpfs mem); if [[ -n "${FILES}" ]]; then echo "You have go format errors; gofmt your changes"; exit 1; fi + - FILES=$(gofmt -s -l . zipfs sftpfs mem tarfs); if [[ -n "${FILES}" ]]; then echo "You have go format errors; gofmt your changes"; exit 1; fi diff --git a/README.md b/README.md index acd4930..c3e807a 100644 --- a/README.md +++ b/README.md @@ -380,7 +380,6 @@ The following is a short list of possible backends we hope someone will implement: * SSH -* TAR * S3 # About the project diff --git a/tarfs/file.go b/tarfs/file.go new file mode 100644 index 0000000..e1d63ed --- /dev/null +++ b/tarfs/file.go @@ -0,0 +1,144 @@ +package tarfs + +import ( + "archive/tar" + "bytes" + "os" + "path/filepath" + "sort" + "syscall" + + "github.com/spf13/afero" +) + +type File struct { + h *tar.Header + data *bytes.Reader + closed bool + fs *Fs +} + +func (f *File) Close() error { + if f.closed { + return afero.ErrFileClosed + } + + f.closed = true + f.h = nil + f.data = nil + f.fs = nil + + return nil +} + +func (f *File) Read(p []byte) (n int, err error) { + if f.closed { + return 0, afero.ErrFileClosed + } + + if f.h.Typeflag == tar.TypeDir { + return 0, syscall.EISDIR + } + + return f.data.Read(p) +} + +func (f *File) ReadAt(p []byte, off int64) (n int, err error) { + if f.closed { + return 0, afero.ErrFileClosed + } + + if f.h.Typeflag == tar.TypeDir { + return 0, syscall.EISDIR + } + + return f.data.ReadAt(p, off) +} + +func (f *File) Seek(offset int64, whence int) (int64, error) { + if f.closed { + return 0, afero.ErrFileClosed + } + + if f.h.Typeflag == tar.TypeDir { + return 0, syscall.EISDIR + } + + return f.data.Seek(offset, whence) +} + +func (f *File) Write(p []byte) (n int, err error) { return 0, syscall.EROFS } + +func (f *File) WriteAt(p []byte, off int64) (n int, err error) { return 0, syscall.EROFS } + +func (f *File) Name() string { + return filepath.Join(splitpath(f.h.Name)) +} + +func (f *File) getDirectoryNames() ([]string, error) { + d, ok := f.fs.files[f.Name()] + if !ok { + return nil, &os.PathError{Op: "readdir", Path: f.Name(), Err: syscall.ENOENT} + } + + var names []string + for n := range d { + names = append(names, n) + } + sort.Strings(names) + + return names, nil +} + +func (f *File) Readdir(count int) ([]os.FileInfo, error) { + if f.closed { + return nil, afero.ErrFileClosed + } + + if !f.h.FileInfo().IsDir() { + return nil, syscall.ENOTDIR + } + + names, err := f.getDirectoryNames() + if err != nil { + return nil, err + } + + d := f.fs.files[f.Name()] + var fi []os.FileInfo + for _, n := range names { + if n == "" { + continue + } + + f := d[n] + fi = append(fi, f.h.FileInfo()) + if count > 0 && len(fi) >= count { + break + } + } + + return fi, nil +} + +func (f *File) Readdirnames(n int) ([]string, error) { + fi, err := f.Readdir(n) + if err != nil { + return nil, err + } + + var names []string + for _, f := range fi { + names = append(names, f.Name()) + } + + return names, nil +} + +func (f *File) Stat() (os.FileInfo, error) { return f.h.FileInfo(), nil } + +func (f *File) Sync() error { return nil } + +func (f *File) Truncate(size int64) error { return syscall.EROFS } + +func (f *File) WriteString(s string) (ret int, err error) { return 0, syscall.EROFS } diff --git a/tarfs/fs.go b/tarfs/fs.go new file mode 100644 index 0000000..ba3212b --- /dev/null +++ b/tarfs/fs.go @@ -0,0 +1,134 @@ +// package tarfs implements a read-only in-memory representation of a tar archive +package tarfs + +import ( + "archive/tar" + "bytes" + "io" + "os" + "path/filepath" + "syscall" + "time" + + "github.com/spf13/afero" +) + +type Fs struct { + files map[string]map[string]*File +} + +func splitpath(name string) (dir, file string) { + name = filepath.ToSlash(name) + if len(name) == 0 || name[0] != '/' { + name = "/" + name + } + name = filepath.Clean(name) + dir, file = filepath.Split(name) + dir = filepath.Clean(dir) + return +} + +func New(t *tar.Reader) *Fs { + fs := &Fs{files: make(map[string]map[string]*File)} + for { + hdr, err := t.Next() + if err == io.EOF { + break + } + if err != nil { + return nil + } + + d, f := splitpath(hdr.Name) + if _, ok := fs.files[d]; !ok { + fs.files[d] = make(map[string]*File) + } + + var buf bytes.Buffer + size, err := buf.ReadFrom(t) + if err != nil { + panic("tarfs: reading from tar:" + err.Error()) + } + + if size != hdr.Size { + panic("tarfs: size mismatch") + } + + file := &File{ + h: hdr, + data: bytes.NewReader(buf.Bytes()), + fs: fs, + } + fs.files[d][f] = file + + } + + // Add a pseudoroot + fs.files[afero.FilePathSeparator][""] = &File{ + h: &tar.Header{ + Name: afero.FilePathSeparator, + Typeflag: tar.TypeDir, + Size: 0, + }, + data: bytes.NewReader(nil), + fs: fs, + } + + return fs +} + +func (fs *Fs) Open(name string) (afero.File, error) { + d, f := splitpath(name) + if _, ok := fs.files[d]; !ok { + return nil, &os.PathError{Op: "open", Path: name, Err: syscall.ENOENT} + } + + file, ok := fs.files[d][f] + if !ok { + return nil, &os.PathError{Op: "open", Path: name, Err: syscall.ENOENT} + } + + nf := *file + + return &nf, nil +} + +func (fs *Fs) Name() string { return "tarfs" } + +func (fs *Fs) Create(name string) (afero.File, error) { return nil, syscall.EROFS } + +func (fs *Fs) Mkdir(name string, perm os.FileMode) error { return syscall.EROFS } + +func (fs *Fs) MkdirAll(path string, perm os.FileMode) error { return syscall.EROFS } + +func (fs *Fs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) { + if flag != os.O_RDONLY { + return nil, &os.PathError{Op: "open", Path: name, Err: syscall.EPERM} + } + + return fs.Open(name) +} + +func (fs *Fs) Remove(name string) error { return syscall.EROFS } + +func (fs *Fs) RemoveAll(path string) error { return syscall.EROFS } + +func (fs *Fs) Rename(oldname string, newname string) error { return syscall.EROFS } + +func (fs *Fs) Stat(name string) (os.FileInfo, error) { + d, f := splitpath(name) + if _, ok := fs.files[d]; !ok { + return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.ENOENT} + } + + file, ok := fs.files[d][f] + if !ok { + return nil, &os.PathError{Op: "stat", Path: name, Err: syscall.ENOENT} + } + + return file.h.FileInfo(), nil +} + +func (fs *Fs) Chmod(name string, mode os.FileMode) error { return syscall.EROFS } + +func (fs *Fs) Chtimes(name string, atime time.Time, mtime time.Time) error { return syscall.EROFS } diff --git a/tarfs/tarfs_test.go b/tarfs/tarfs_test.go new file mode 100644 index 0000000..8d5eaab --- /dev/null +++ b/tarfs/tarfs_test.go @@ -0,0 +1,402 @@ +// Most of the tests are stolen from the zipfs implementation +package tarfs + +import ( + "archive/tar" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "reflect" + "syscall" + "testing" + + "github.com/spf13/afero" +) + +var files = []struct { + name string + exists bool + isdir bool + size int64 + content string + contentAt4k string +}{ + {"/", true, true, 0, "", ""}, + {"/sub", true, true, 0, "", ""}, + {"/sub/testDir2", true, true, 0, "", ""}, + {"/sub/testDir2/testFile", true, false, 8192, "cccccccc", "ccccdddd"}, + {"/testFile", true, false, 8192, "aaaaaaaa", "aaaabbbb"}, + {"/testDir1/testFile", true, false, 8192, "bbbbbbbb", "bbbbcccc"}, + + {"/nonExisting", false, false, 0, "", ""}, +} + +var dirs = []struct { + name string + children []string +}{ + {"/", []string{"sub", "testDir1", "testFile"}}, + {"/sub", []string{"testDir2"}}, + {"/sub/testDir2", []string{"testFile"}}, + {"/testDir1", []string{"testFile"}}, +} + +var afs *afero.Afero + +func TestMain(m *testing.M) { + tf, err := os.Open("testdata/t.tar") + if err != nil { + fmt.Print(err) + os.Exit(1) + } + + tfs := New(tar.NewReader(tf)) + afs = &afero.Afero{Fs: tfs} + os.Exit(m.Run()) +} + +func TestFsOpen(t *testing.T) { + for _, f := range files { + file, err := afs.Open(f.name) + if (err == nil) != f.exists { + t.Errorf("%v exists = %v, but got err = %v", f.name, f.exists, err) + } + + if !f.exists { + continue + } + if err != nil { + t.Fatalf("%v: %v", f.name, err) + } + + if file.Name() != filepath.FromSlash(f.name) { + t.Errorf("Name(), got %v, expected %v", file.Name(), filepath.FromSlash(f.name)) + } + + s, err := file.Stat() + if err != nil { + t.Fatalf("stat %v: got error '%v'", file.Name(), err) + } + + if isdir := s.IsDir(); isdir != f.isdir { + t.Errorf("%v directory, got: %v, expected: %v", file.Name(), isdir, f.isdir) + } + + if size := s.Size(); size != f.size { + t.Errorf("%v size, got: %v, expected: %v", file.Name(), size, f.size) + } + } +} + +func TestRead(t *testing.T) { + for _, f := range files { + if !f.exists { + continue + } + + file, err := afs.Open(f.name) + if err != nil { + t.Fatalf("opening %v: %v", f.name, err) + } + + buf := make([]byte, 8) + n, err := file.Read(buf) + if err != nil { + if f.isdir && (err != syscall.EISDIR) { + t.Errorf("%v got error %v, expected EISDIR", f.name, err) + } else if !f.isdir { + t.Errorf("%v: %v", f.name, err) + } + } else if n != 8 { + t.Errorf("%v: got %d read bytes, expected 8", f.name, n) + } else if string(buf) != f.content { + t.Errorf("%v: got <%s>, expected <%s>", f.name, f.content, string(buf)) + } + + } +} + +func TestReadAt(t *testing.T) { + for _, f := range files { + if !f.exists { + continue + } + + file, err := afs.Open(f.name) + if err != nil { + t.Fatalf("opening %v: %v", f.name, err) + } + + buf := make([]byte, 8) + n, err := file.ReadAt(buf, 4092) + if err != nil { + if f.isdir && (err != syscall.EISDIR) { + t.Errorf("%v got error %v, expected EISDIR", f.name, err) + } else if !f.isdir { + t.Errorf("%v: %v", f.name, err) + } + } else if n != 8 { + t.Errorf("%v: got %d read bytes, expected 8", f.name, n) + } else if string(buf) != f.contentAt4k { + t.Errorf("%v: got <%s>, expected <%s>", f.name, f.contentAt4k, string(buf)) + } + + } +} + +func TestSeek(t *testing.T) { + for _, f := range files { + if !f.exists { + continue + } + + file, err := afs.Open(f.name) + if err != nil { + t.Fatalf("opening %v: %v", f.name, err) + } + + var tests = []struct { + offin int64 + whence int + offout int64 + }{ + {0, io.SeekStart, 0}, + {10, io.SeekStart, 10}, + {1, io.SeekCurrent, 11}, + {10, io.SeekCurrent, 21}, + {0, io.SeekEnd, f.size}, + {-1, io.SeekEnd, f.size - 1}, + } + + for _, s := range tests { + n, err := file.Seek(s.offin, s.whence) + if err != nil { + if f.isdir && err == syscall.EISDIR { + continue + } + + t.Errorf("%v: %v", f.name, err) + } + + if n != s.offout { + t.Errorf("%v: (off: %v, whence: %v): got %v, expected %v", f.name, s.offin, s.whence, n, s.offout) + } + } + + } +} + +func TestName(t *testing.T) { + for _, f := range files { + if !f.exists { + continue + } + + file, err := afs.Open(f.name) + if err != nil { + t.Fatalf("opening %v: %v", f.name, err) + } + + n := file.Name() + if n != filepath.FromSlash(f.name) { + t.Errorf("got: %v, expected: %v", n, filepath.FromSlash(f.name)) + } + + } +} + +func TestClose(t *testing.T) { + for _, f := range files { + if !f.exists { + continue + } + + file, err := afs.Open(f.name) + if err != nil { + t.Fatalf("opening %v: %v", f.name, err) + } + + err = file.Close() + if err != nil { + t.Errorf("%v: %v", f.name, err) + } + + err = file.Close() + if err == nil { + t.Errorf("%v: closing twice should return an error", f.name) + } + + buf := make([]byte, 8) + n, err := file.Read(buf) + if n != 0 || err == nil { + t.Errorf("%v: could read from a closed file", f.name) + } + + n, err = file.ReadAt(buf, 256) + if n != 0 || err == nil { + t.Errorf("%v: could readAt from a closed file", f.name) + } + + off, err := file.Seek(0, io.SeekStart) + if off != 0 || err == nil { + t.Errorf("%v: could seek from a closed file", f.name) + } + } +} + +func TestOpenFile(t *testing.T) { + for _, f := range files { + file, err := afs.OpenFile(f.name, os.O_RDONLY, 0400) + if !f.exists { + if !errors.Is(err, syscall.ENOENT) { + t.Errorf("%v: got %v, expected%v", f.name, err, syscall.ENOENT) + } + + continue + } + + if err != nil { + t.Fatalf("%v: %v", f.name, err) + } + file.Close() + + file, err = afs.OpenFile(f.name, os.O_CREATE, 0600) + if !errors.Is(err, syscall.EPERM) { + t.Errorf("%v: open for write: got %v, expected %v", f.name, err, syscall.EPERM) + } + + } +} + +func TestFsStat(t *testing.T) { + for _, f := range files { + fi, err := afs.Stat(f.name) + if !f.exists { + if !errors.Is(err, syscall.ENOENT) { + t.Errorf("%v: got %v, expected%v", f.name, err, syscall.ENOENT) + } + + continue + } + + if err != nil { + t.Fatalf("stat %v: got error '%v'", f.name, err) + } + + if isdir := fi.IsDir(); isdir != f.isdir { + t.Errorf("%v directory, got: %v, expected: %v", f.name, isdir, f.isdir) + } + + if size := fi.Size(); size != f.size { + t.Errorf("%v size, got: %v, expected: %v", f.name, size, f.size) + } + } +} + +func TestReaddir(t *testing.T) { + for _, d := range dirs { + dir, err := afs.Open(d.name) + if err != nil { + t.Fatal(err) + } + + fi, err := dir.Readdir(0) + if err != nil { + t.Fatal(err) + } + var names []string + for _, f := range fi { + names = append(names, f.Name()) + } + + if !reflect.DeepEqual(names, d.children) { + t.Errorf("%v: children, got '%v', expected '%v'", d.name, names, d.children) + } + + fi, err = dir.Readdir(1) + if err != nil { + t.Fatal(err) + } + + names = []string{} + for _, f := range fi { + names = append(names, f.Name()) + } + + if !reflect.DeepEqual(names, d.children[0:1]) { + t.Errorf("%v: children, got '%v', expected '%v'", d.name, names, d.children[0:1]) + } + } + + dir, err := afs.Open("/testFile") + if err != nil { + t.Fatal(err) + } + + _, err = dir.Readdir(-1) + if err != syscall.ENOTDIR { + t.Fatal("Expected error") + } +} + +func TestReaddirnames(t *testing.T) { + for _, d := range dirs { + dir, err := afs.Open(d.name) + if err != nil { + t.Fatal(err) + } + + names, err := dir.Readdirnames(0) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(names, d.children) { + t.Errorf("%v: children, got '%v', expected '%v'", d.name, names, d.children) + } + + names, err = dir.Readdirnames(1) + if err != nil { + t.Fatal(err) + } + + if !reflect.DeepEqual(names, d.children[0:1]) { + t.Errorf("%v: children, got '%v', expected '%v'", d.name, names, d.children[0:1]) + } + } + + dir, err := afs.Open("/testFile") + if err != nil { + t.Fatal(err) + } + + _, err = dir.Readdir(-1) + if err != syscall.ENOTDIR { + t.Fatal("Expected error") + } +} + +func TestGlob(t *testing.T) { + for _, s := range []struct { + glob string + entries []string + }{ + {filepath.FromSlash("/*"), []string{filepath.FromSlash("/sub"), filepath.FromSlash("/testDir1"), filepath.FromSlash("/testFile")}}, + {filepath.FromSlash("*"), []string{filepath.FromSlash("sub"), filepath.FromSlash("testDir1"), filepath.FromSlash("testFile")}}, + {filepath.FromSlash("sub/*"), []string{filepath.FromSlash("sub/testDir2")}}, + {filepath.FromSlash("sub/testDir2/*"), []string{filepath.FromSlash("sub/testDir2/testFile")}}, + {filepath.FromSlash("testDir1/*"), []string{filepath.FromSlash("testDir1/testFile")}}, + } { + entries, err := afero.Glob(afs.Fs, s.glob) + if err != nil { + t.Error(err) + } + if reflect.DeepEqual(entries, s.entries) { + t.Logf("glob: %s: glob ok", s.glob) + } else { + t.Errorf("glob: %s: got %#v, expected %#v", s.glob, entries, s.entries) + } + } +} diff --git a/tarfs/testdata/t.tar b/tarfs/testdata/t.tar new file mode 100644 index 0000000000000000000000000000000000000000..d5b9aa0fb5ebdcc5f85b3921202f8b4d5d513af2 GIT binary patch literal 30720 zcmeI)OHPA800v-=(i;dfJT73oM_Y+$qAvQ_czS1y5>04i0j9xkHZ)8+!1oWkKb$VR zyJF!{rqMM`kq%YYl;4keDvzRyp^Rl6n>Ni?p$xIuGz;I?$MTocd3)S!itW?krGM?; zu3huD`D_2X@$;vY|G7V%?+?eY)JDhwPt3m#U7hleO=#u+7hC5?{&^!j9G?4`FP!sl z>y+#0`ycZ3@cy?kru>^wrToj8?_+eifBydO4FUuR5FkK+009C72oNAZVD$oz1_1&D z2oNAZfB*pk1PBlyuzG>d?oYK;_a}EQa=~MA|ECSP|JB7x|7WzXGV{qH|Ao3gU-?h& z|CC+Izpg{0|1)#cqs#3K0t5&UAV7cs0RjXF5FkKc^#ZFeIC($<1PBlyK!5-N0t5&U zAV6ThVC`e*8|41a#QsMaWA1;1E`R^Y_g@CW#eQrJ0t5&UAV7cs0RjXF5FkKc^#ZFe qxOhMU1PBlyK!5-N0t5&UAV6TZSh2vu1PBlyK!5-N0t5)$oWMKSbAGu1 literal 0 HcmV?d00001