Files
mgmt/etcd/fs/file.go
James Shubin d30ff6cfae legal: Remove year
Instead of constantly making these updates, let's just remove the year
since things are stored in git anyways, and this is not an actual modern
legal risk anymore.
2025-01-26 16:24:51 -05:00

552 lines
15 KiB
Go

// Mgmt
// Copyright (C) James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// Additional permission under GNU GPL version 3 section 7
//
// If you modify this program, or any covered work, by linking or combining it
// with embedded mcl code and modules (and that the embedded mcl code and
// modules which link with this program, contain a copy of their source code in
// the authoritative form) containing parts covered by the terms of any other
// license, the licensors of this program grant you additional permission to
// convey the resulting work. Furthermore, the licensors of this program grant
// the original author, James Shubin, additional permission to update this
// additional permission if he deems it necessary to achieve the goals of this
// additional permission.
package fs
import (
"bytes"
"encoding/gob"
"fmt"
"io"
"os"
"path"
"strings"
"syscall"
"time"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "go.etcd.io/etcd/client/v3"
etcdutil "go.etcd.io/etcd/client/v3/clientv3util"
)
func init() {
gob.Register(&File{})
}
// File represents a file node. This is the node of our tree structure. This is
// not thread safe, and you can have at most one open file handle at a time.
type File struct {
// FIXME: add a rwmutex to make this thread safe
fs *Fs // pointer to file system
Path string // relative path to file, trailing slash if it's a directory
Mode os.FileMode
ModTime time.Time
//Size int64 // XXX: cache the size to avoid full file downloads for stat!
Children []*File // dir's use this
Hash string // string not []byte so it's readable, matches data
data []byte // cache of the data. private so it doesn't get encoded
cursor int64
dirCursor int64
readOnly bool // is the file read-only?
closed bool // is file closed?
}
// path returns the expected path to the actual file in etcd.
func (obj *File) path() string {
// keys are prefixed with the hash-type eg: {sha256} to allow different
// superblocks to share the same data prefix even with different hashes
return fmt.Sprintf("%s/{%s}%s", obj.fs.sb.DataPrefix, obj.fs.Hash, obj.Hash)
}
// cache downloads the file contents from etcd and stores them in our cache.
func (obj *File) cache() error {
if obj.Mode.IsDir() {
return nil
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return err
}
if h == obj.Hash { // we already have the correct data cached
return nil
}
p := obj.path() // get file data from this path in etcd
result, err := obj.fs.get(p) // download the file...
if err != nil {
return err
}
if result == nil || len(result) == 0 { // nothing found
return err
}
data, exists := result[p]
if !exists {
return fmt.Errorf("could not find data") // programming error?
}
obj.data = data // save
return nil
}
// findNode is the "in array" equivalent for searching through a dir's children.
// You must *not* specify an absolute path as the search string, but rather you
// should specify the name. To search for something name "bar" inside a dir
// named "/tmp/foo/", you just pass in "bar", not "/tmp/foo/bar".
func (obj *File) findNode(name string) (*File, bool) {
for _, node := range obj.Children {
if name == node.Path {
return node, true // found
}
}
return nil, false // not found
}
func fileCreate(fs *Fs, name string) (*File, error) {
if name == "" {
return nil, fmt.Errorf("invalid input path")
}
if !strings.HasPrefix(name, "/") {
return nil, fmt.Errorf("invalid input path (not absolute)")
}
cleanPath := path.Clean(name) // remove possible trailing slashes
// try to add node to tree by first finding the parent node
parentPath, filePath := path.Split(cleanPath) // looking for this
node, err := fs.find(parentPath)
if err != nil { // might be ErrNotExist
return nil, err
}
fi, err := node.Stat()
if err != nil {
return nil, err
}
if !fi.IsDir() { // is the parent a suitable home?
return nil, &os.PathError{Op: "create", Path: name, Err: syscall.ENOTDIR}
}
f, exists := node.findNode(filePath) // does file already exist inside?
if exists { // already exists, overwrite!
if err := f.Truncate(0); err != nil {
return nil, err
}
return f, nil
}
data := []byte("") // empty file contents
h, err := fs.hash(data) // TODO: use memoized value?
if err != nil {
return &File{}, err // TODO: nil instead?
}
f = &File{
fs: fs,
Path: filePath, // the relative path chunk (not incl. dir name)
Hash: h,
data: data,
}
// add to parent
node.Children = append(node.Children, f)
// push new file up if not on server, and then push up the metadata
if err := f.Sync(); err != nil {
return f, err // TODO: ok to return the file so user can run sync?
}
return f, nil
}
func fileOpen(fs *Fs, name string) (*File, error) {
if name == "" {
return nil, fmt.Errorf("invalid input path")
}
if !strings.HasPrefix(name, "/") {
return nil, fmt.Errorf("invalid input path (not absolute)")
}
cleanPath := path.Clean(name) // remove possible trailing slashes
node, err := fs.find(cleanPath)
if err != nil { // might be ErrNotExist
return &File{}, err // TODO: nil instead?
}
// download file contents into obj.data
if err := node.cache(); err != nil {
return &File{}, err // TODO: nil instead?
}
//fi, err := node.Stat()
//if err != nil {
// return nil, err
//}
//if fi.IsDir() { // can we open a directory? - yes we can apparently
// return nil, fmt.Errorf("file is a directory")
//}
node.readOnly = true // as per docs, fileOpen opens files as read-only
node.closed = false // as per docs, fileOpen opens files as read-only
return node, nil
}
// Close closes the file handle. This will try and run Sync automatically.
func (obj *File) Close() error {
if !obj.readOnly {
obj.ModTime = time.Now()
}
if err := obj.Sync(); err != nil {
return err
}
// FIXME: there is a big implementation mistake between the metadata
// node and the file handle, since they're currently sharing a struct!
// invalidate all of the fields
//obj.fs = nil
//obj.Path = ""
//obj.Mode = os.FileMode(0)
//obj.ModTime = time.Time{}
//obj.Children = nil
//obj.Hash = ""
//obj.data = nil
obj.cursor = 0
obj.readOnly = false
obj.closed = true
return nil
}
// Name returns the path of the file.
func (obj *File) Name() string {
return obj.Path
}
// Stat returns some information about the file.
func (obj *File) Stat() (os.FileInfo, error) {
// download file contents into obj.data
if err := obj.cache(); err != nil { // needed so Size() works correctly
return nil, err
}
return &FileInfo{ // everything is actually stored in the main file node
file: obj,
}, nil
}
// Sync flushes the file contents to the server and calls the filesystem
// metadata sync as well.
// FIXME: instead of a txn, run a get and then a put in two separate stages. if
// the get already found the data up there, then we don't need to push it all in
// the put phase. with the txn it is always all sent up even if the put is never
// needed. the get should just be a "key exists" test, and not a download of the
// whole file. if we *do* do the download, we can byte-by-byte check for hash
// collisions and panic if we find one :)
func (obj *File) Sync() error {
if obj.closed {
return ErrFileClosed
}
p := obj.path() // store file data at this path in etcd
//cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
cmp := etcdutil.KeyMissing(p)
op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
result, err := obj.fs.txn([]etcd.Cmp{cmp}, []etcd.Op{op}, nil)
if err != nil {
return errwrap.Wrapf(err, "sync error with: %s (%s)", obj.Path, p)
}
if !result.Succeeded {
if obj.fs.Debug {
obj.fs.Logf("debug: data already exists in storage")
}
}
if err := obj.fs.sync(); err != nil { // push metadata up to server
return err
}
return nil
}
// Truncate trims the file to the requested size. Since our file system can only
// read and write data, but never edit existing data blocks, doing this will not
// cause more space to be available.
func (obj *File) Truncate(size int64) error {
if obj.closed {
return ErrFileClosed
}
if obj.readOnly {
return &os.PathError{Op: "truncate", Path: obj.Path, Err: ErrFileReadOnly}
}
if size < 0 {
return ErrOutOfRange
}
if size > 0 { // if size == 0, we don't need to run cache!
// download file contents into obj.data
if err := obj.cache(); err != nil {
return err
}
}
if size > int64(len(obj.data)) {
diff := size - int64(len(obj.data))
obj.data = append(obj.data, bytes.Repeat([]byte{00}, int(diff))...)
} else {
obj.data = obj.data[0:size]
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return err
}
obj.Hash = h
obj.ModTime = time.Now()
// this pushes the new data and metadata up to etcd
return obj.Sync()
}
// Read reads up to len(b) bytes from the File. It returns the number of bytes
// read and any error encountered. At end of file, Read returns 0, io.EOF.
// NOTE: This reads into the byte input. It's a side effect!
func (obj *File) Read(b []byte) (n int, err error) {
if obj.closed {
return 0, ErrFileClosed
}
if obj.Mode.IsDir() {
return 0, fmt.Errorf("file is a directory")
}
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
// TODO: can we optimize by reading just the length from etcd, and also
// by only downloading the data range we're interested in?
if len(b) > 0 && int(obj.cursor) == len(obj.data) {
return 0, io.EOF
}
if len(obj.data)-int(obj.cursor) >= len(b) {
n = len(b)
} else {
n = len(obj.data) - int(obj.cursor)
}
copy(b, obj.data[obj.cursor:obj.cursor+int64(n)]) // store into input b
obj.cursor = obj.cursor + int64(n) // update cursor
return
}
// ReadAt reads len(b) bytes from the File starting at byte offset off. It
// returns the number of bytes read and the error, if any. ReadAt always returns
// a non-nil error when n < len(b). At end of file, that error is io.EOF.
func (obj *File) ReadAt(b []byte, off int64) (n int, err error) {
obj.cursor = off
return obj.Read(b)
}
// Readdir lists the contents of the directory and returns a list of file info
// objects for each entry.
func (obj *File) Readdir(count int) ([]os.FileInfo, error) {
if !obj.Mode.IsDir() {
return nil, &os.PathError{Op: "readdir", Path: obj.Name(), Err: syscall.ENOTDIR}
}
children := obj.Children[obj.dirCursor:] // available children to output
var l = int64(len(children)) // initially assume to return them all
var err error
// for count > 0, if we return the last entry, also return io.EOF
if count > 0 {
l = int64(count) // initial assumption
if c := len(children); count >= c {
l = int64(c)
err = io.EOF // this result includes the last dir entry
}
}
obj.dirCursor += l // store our progress
output := make([]os.FileInfo, l)
// TODO: should this be sorted by "directory order" what does that mean?
// from `man 3 readdir`: "unlikely that the names will be sorted"
for i := range output {
output[i] = &FileInfo{
file: children[i],
}
}
// we're seen the whole directory, so reset the cursor
if err == io.EOF || count <= 0 {
obj.dirCursor = 0 // TODO: is it okay to reset the cursor?
}
return output, err
}
// Readdirnames returns a list of name is the current file handle's directory.
// TODO: this implementation shares the dirCursor with Readdir, is this okay?
// TODO: should Readdirnames even use a dirCursor at all?
func (obj *File) Readdirnames(n int) (names []string, _ error) {
fis, err := obj.Readdir(n)
if fis != nil {
for i, x := range fis {
if x != nil {
names = append(names, fis[i].Name())
}
}
}
return names, err
}
// Seek sets the offset for the next Read or Write on file to offset,
// interpreted according to whence: 0 means relative to the origin of the file,
// 1 means relative to the current offset, and 2 means relative to the end. It
// returns the new offset and an error, if any. The behavior of Seek on a file
// opened with O_APPEND is not specified.
func (obj *File) Seek(offset int64, whence int) (int64, error) {
if obj.closed {
return 0, ErrFileClosed
}
switch whence {
case io.SeekStart: // 0
obj.cursor = offset
case io.SeekCurrent: // 1
obj.cursor += offset
case io.SeekEnd: // 2
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
obj.cursor = int64(len(obj.data)) + offset
}
return obj.cursor, nil
}
// Write writes to the given file.
func (obj *File) Write(b []byte) (n int, err error) {
if obj.closed {
return 0, ErrFileClosed
}
if obj.readOnly {
return 0, &os.PathError{Op: "write", Path: obj.Path, Err: ErrFileReadOnly}
}
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
// calculate the write
n = len(b)
cur := obj.cursor
diff := cur - int64(len(obj.data))
var tail []byte
if n+int(cur) < len(obj.data) {
tail = obj.data[n+int(cur):]
}
if diff > 0 {
obj.data = append(bytes.Repeat([]byte{00}, int(diff)), b...)
obj.data = append(obj.data, tail...)
} else {
obj.data = append(obj.data[:cur], b...)
obj.data = append(obj.data, tail...)
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return 0, err // TODO: -1 ?
}
obj.Hash = h
obj.ModTime = time.Now()
// this pushes the new data and metadata up to etcd
if err := obj.Sync(); err != nil {
return 0, err // TODO: -1 ?
}
obj.cursor = int64(len(obj.data))
return
}
// WriteAt writes into the given file at a certain offset.
func (obj *File) WriteAt(b []byte, off int64) (n int, err error) {
obj.cursor = off
return obj.Write(b)
}
// WriteString writes a string to the file.
func (obj *File) WriteString(s string) (n int, err error) {
return obj.Write([]byte(s))
}
// FileInfo is a struct which provides some information about a file handle.
type FileInfo struct {
file *File // anonymous pointer to the actual file
}
// Name returns the base name of the file.
func (obj *FileInfo) Name() string {
return obj.file.Name()
}
// Size returns the length in bytes.
func (obj *FileInfo) Size() int64 {
return int64(len(obj.file.data))
}
// Mode returns the file mode bits.
func (obj *FileInfo) Mode() os.FileMode {
return obj.file.Mode
}
// ModTime returns the modification time.
func (obj *FileInfo) ModTime() time.Time {
return obj.file.ModTime
}
// IsDir is an abbreviation for Mode().IsDir().
func (obj *FileInfo) IsDir() bool {
//return obj.file.Mode&os.ModeDir != 0
return obj.file.Mode.IsDir()
}
// Sys returns the underlying data source (can return nil).
func (obj *FileInfo) Sys() interface{} {
return nil // TODO: should we do something better?
//return obj.file.fs // TODO: would this work?
}