Files
mgmt/etcd/fs/file.go
James Shubin 336a38081a legal: Happy 2021 everyone...
Done with:

ack '2020+' -l | xargs sed -i -e 's/2020+/2021+/g'

Checked manually with:

git add -p

Hello to future James from 2022, and Happy Hacking!
2021-01-31 16:52:46 -05:00

540 lines
15 KiB
Go

// Mgmt
// Copyright (C) 2013-2021+ James Shubin and the project contributors
// Written by James Shubin <james@shubin.ca> and the project contributors
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package fs
import (
"bytes"
"encoding/gob"
"fmt"
"io"
"os"
"path"
"strings"
"syscall"
"time"
"github.com/purpleidea/mgmt/util/errwrap"
etcd "go.etcd.io/etcd/clientv3" // "clientv3"
etcdutil "go.etcd.io/etcd/clientv3/clientv3util"
)
func init() {
gob.Register(&File{})
}
// File represents a file node. This is the node of our tree structure. This is
// not thread safe, and you can have at most one open file handle at a time.
type File struct {
// FIXME: add a rwmutex to make this thread safe
fs *Fs // pointer to file system
Path string // relative path to file, trailing slash if it's a directory
Mode os.FileMode
ModTime time.Time
//Size int64 // XXX: cache the size to avoid full file downloads for stat!
Children []*File // dir's use this
Hash string // string not []byte so it's readable, matches data
data []byte // cache of the data. private so it doesn't get encoded
cursor int64
dirCursor int64
readOnly bool // is the file read-only?
closed bool // is file closed?
}
// path returns the expected path to the actual file in etcd.
func (obj *File) path() string {
// keys are prefixed with the hash-type eg: {sha256} to allow different
// superblocks to share the same data prefix even with different hashes
return fmt.Sprintf("%s/{%s}%s", obj.fs.sb.DataPrefix, obj.fs.Hash, obj.Hash)
}
// cache downloads the file contents from etcd and stores them in our cache.
func (obj *File) cache() error {
if obj.Mode.IsDir() {
return nil
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return err
}
if h == obj.Hash { // we already have the correct data cached
return nil
}
p := obj.path() // get file data from this path in etcd
result, err := obj.fs.get(p) // download the file...
if err != nil {
return err
}
if result == nil || len(result) == 0 { // nothing found
return err
}
data, exists := result[p]
if !exists {
return fmt.Errorf("could not find data") // programming error?
}
obj.data = data // save
return nil
}
// findNode is the "in array" equivalent for searching through a dir's children.
// You must *not* specify an absolute path as the search string, but rather you
// should specify the name. To search for something name "bar" inside a dir
// named "/tmp/foo/", you just pass in "bar", not "/tmp/foo/bar".
func (obj *File) findNode(name string) (*File, bool) {
for _, node := range obj.Children {
if name == node.Path {
return node, true // found
}
}
return nil, false // not found
}
func fileCreate(fs *Fs, name string) (*File, error) {
if name == "" {
return nil, fmt.Errorf("invalid input path")
}
if !strings.HasPrefix(name, "/") {
return nil, fmt.Errorf("invalid input path (not absolute)")
}
cleanPath := path.Clean(name) // remove possible trailing slashes
// try to add node to tree by first finding the parent node
parentPath, filePath := path.Split(cleanPath) // looking for this
node, err := fs.find(parentPath)
if err != nil { // might be ErrNotExist
return nil, err
}
fi, err := node.Stat()
if err != nil {
return nil, err
}
if !fi.IsDir() { // is the parent a suitable home?
return nil, &os.PathError{Op: "create", Path: name, Err: syscall.ENOTDIR}
}
f, exists := node.findNode(filePath) // does file already exist inside?
if exists { // already exists, overwrite!
if err := f.Truncate(0); err != nil {
return nil, err
}
return f, nil
}
data := []byte("") // empty file contents
h, err := fs.hash(data) // TODO: use memoized value?
if err != nil {
return &File{}, err // TODO: nil instead?
}
f = &File{
fs: fs,
Path: filePath, // the relative path chunk (not incl. dir name)
Hash: h,
data: data,
}
// add to parent
node.Children = append(node.Children, f)
// push new file up if not on server, and then push up the metadata
if err := f.Sync(); err != nil {
return f, err // TODO: ok to return the file so user can run sync?
}
return f, nil
}
func fileOpen(fs *Fs, name string) (*File, error) {
if name == "" {
return nil, fmt.Errorf("invalid input path")
}
if !strings.HasPrefix(name, "/") {
return nil, fmt.Errorf("invalid input path (not absolute)")
}
cleanPath := path.Clean(name) // remove possible trailing slashes
node, err := fs.find(cleanPath)
if err != nil { // might be ErrNotExist
return &File{}, err // TODO: nil instead?
}
// download file contents into obj.data
if err := node.cache(); err != nil {
return &File{}, err // TODO: nil instead?
}
//fi, err := node.Stat()
//if err != nil {
// return nil, err
//}
//if fi.IsDir() { // can we open a directory? - yes we can apparently
// return nil, fmt.Errorf("file is a directory")
//}
node.readOnly = true // as per docs, fileOpen opens files as read-only
node.closed = false // as per docs, fileOpen opens files as read-only
return node, nil
}
// Close closes the file handle. This will try and run Sync automatically.
func (obj *File) Close() error {
if !obj.readOnly {
obj.ModTime = time.Now()
}
if err := obj.Sync(); err != nil {
return err
}
// FIXME: there is a big implementation mistake between the metadata
// node and the file handle, since they're currently sharing a struct!
// invalidate all of the fields
//obj.fs = nil
//obj.Path = ""
//obj.Mode = os.FileMode(0)
//obj.ModTime = time.Time{}
//obj.Children = nil
//obj.Hash = ""
//obj.data = nil
obj.cursor = 0
obj.readOnly = false
obj.closed = true
return nil
}
// Name returns the path of the file.
func (obj *File) Name() string {
return obj.Path
}
// Stat returns some information about the file.
func (obj *File) Stat() (os.FileInfo, error) {
// download file contents into obj.data
if err := obj.cache(); err != nil { // needed so Size() works correctly
return nil, err
}
return &FileInfo{ // everything is actually stored in the main file node
file: obj,
}, nil
}
// Sync flushes the file contents to the server and calls the filesystem
// metadata sync as well.
// FIXME: instead of a txn, run a get and then a put in two separate stages. if
// the get already found the data up there, then we don't need to push it all in
// the put phase. with the txn it is always all sent up even if the put is never
// needed. the get should just be a "key exists" test, and not a download of the
// whole file. if we *do* do the download, we can byte-by-byte check for hash
// collisions and panic if we find one :)
func (obj *File) Sync() error {
if obj.closed {
return ErrFileClosed
}
p := obj.path() // store file data at this path in etcd
//cmp := etcd.Compare(etcd.Version(p), "=", 0) // KeyMissing
cmp := etcdutil.KeyMissing(p)
op := etcd.OpPut(p, string(obj.data)) // this pushes contents to server
// it's important to do this in one transaction, and atomically, because
// this way, we only generate one watch event, and only when it's needed
result, err := obj.fs.txn([]etcd.Cmp{cmp}, []etcd.Op{op}, nil)
if err != nil {
return errwrap.Wrapf(err, "sync error with: %s (%s)", obj.Path, p)
}
if !result.Succeeded {
if obj.fs.Debug {
obj.fs.Logf("debug: data already exists in storage")
}
}
if err := obj.fs.sync(); err != nil { // push metadata up to server
return err
}
return nil
}
// Truncate trims the file to the requested size. Since our file system can only
// read and write data, but never edit existing data blocks, doing this will not
// cause more space to be available.
func (obj *File) Truncate(size int64) error {
if obj.closed {
return ErrFileClosed
}
if obj.readOnly {
return &os.PathError{Op: "truncate", Path: obj.Path, Err: ErrFileReadOnly}
}
if size < 0 {
return ErrOutOfRange
}
if size > 0 { // if size == 0, we don't need to run cache!
// download file contents into obj.data
if err := obj.cache(); err != nil {
return err
}
}
if size > int64(len(obj.data)) {
diff := size - int64(len(obj.data))
obj.data = append(obj.data, bytes.Repeat([]byte{00}, int(diff))...)
} else {
obj.data = obj.data[0:size]
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return err
}
obj.Hash = h
obj.ModTime = time.Now()
// this pushes the new data and metadata up to etcd
return obj.Sync()
}
// Read reads up to len(b) bytes from the File. It returns the number of bytes
// read and any error encountered. At end of file, Read returns 0, io.EOF.
// NOTE: This reads into the byte input. It's a side effect!
func (obj *File) Read(b []byte) (n int, err error) {
if obj.closed {
return 0, ErrFileClosed
}
if obj.Mode.IsDir() {
return 0, fmt.Errorf("file is a directory")
}
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
// TODO: can we optimize by reading just the length from etcd, and also
// by only downloading the data range we're interested in?
if len(b) > 0 && int(obj.cursor) == len(obj.data) {
return 0, io.EOF
}
if len(obj.data)-int(obj.cursor) >= len(b) {
n = len(b)
} else {
n = len(obj.data) - int(obj.cursor)
}
copy(b, obj.data[obj.cursor:obj.cursor+int64(n)]) // store into input b
obj.cursor = obj.cursor + int64(n) // update cursor
return
}
// ReadAt reads len(b) bytes from the File starting at byte offset off. It
// returns the number of bytes read and the error, if any. ReadAt always returns
// a non-nil error when n < len(b). At end of file, that error is io.EOF.
func (obj *File) ReadAt(b []byte, off int64) (n int, err error) {
obj.cursor = off
return obj.Read(b)
}
// Readdir lists the contents of the directory and returns a list of file info
// objects for each entry.
func (obj *File) Readdir(count int) ([]os.FileInfo, error) {
if !obj.Mode.IsDir() {
return nil, &os.PathError{Op: "readdir", Path: obj.Name(), Err: syscall.ENOTDIR}
}
children := obj.Children[obj.dirCursor:] // available children to output
var l = int64(len(children)) // initially assume to return them all
var err error
// for count > 0, if we return the last entry, also return io.EOF
if count > 0 {
l = int64(count) // initial assumption
if c := len(children); count >= c {
l = int64(c)
err = io.EOF // this result includes the last dir entry
}
}
obj.dirCursor += l // store our progress
output := make([]os.FileInfo, l)
// TODO: should this be sorted by "directory order" what does that mean?
// from `man 3 readdir`: "unlikely that the names will be sorted"
for i := range output {
output[i] = &FileInfo{
file: children[i],
}
}
// we're seen the whole directory, so reset the cursor
if err == io.EOF || count <= 0 {
obj.dirCursor = 0 // TODO: is it okay to reset the cursor?
}
return output, err
}
// Readdirnames returns a list of name is the current file handle's directory.
// TODO: this implementation shares the dirCursor with Readdir, is this okay?
// TODO: should Readdirnames even use a dirCursor at all?
func (obj *File) Readdirnames(n int) (names []string, _ error) {
fis, err := obj.Readdir(n)
if fis != nil {
for i, x := range fis {
if x != nil {
names = append(names, fis[i].Name())
}
}
}
return names, err
}
// Seek sets the offset for the next Read or Write on file to offset,
// interpreted according to whence: 0 means relative to the origin of the file,
// 1 means relative to the current offset, and 2 means relative to the end. It
// returns the new offset and an error, if any. The behavior of Seek on a file
// opened with O_APPEND is not specified.
func (obj *File) Seek(offset int64, whence int) (int64, error) {
if obj.closed {
return 0, ErrFileClosed
}
switch whence {
case io.SeekStart: // 0
obj.cursor = offset
case io.SeekCurrent: // 1
obj.cursor += offset
case io.SeekEnd: // 2
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
obj.cursor = int64(len(obj.data)) + offset
}
return obj.cursor, nil
}
// Write writes to the given file.
func (obj *File) Write(b []byte) (n int, err error) {
if obj.closed {
return 0, ErrFileClosed
}
if obj.readOnly {
return 0, &os.PathError{Op: "write", Path: obj.Path, Err: ErrFileReadOnly}
}
// download file contents into obj.data
if err := obj.cache(); err != nil {
return 0, err // TODO: -1 ?
}
// calculate the write
n = len(b)
cur := obj.cursor
diff := cur - int64(len(obj.data))
var tail []byte
if n+int(cur) < len(obj.data) {
tail = obj.data[n+int(cur):]
}
if diff > 0 {
obj.data = append(bytes.Repeat([]byte{00}, int(diff)), b...)
obj.data = append(obj.data, tail...)
} else {
obj.data = append(obj.data[:cur], b...)
obj.data = append(obj.data, tail...)
}
h, err := obj.fs.hash(obj.data) // update hash
if err != nil {
return 0, err // TODO: -1 ?
}
obj.Hash = h
obj.ModTime = time.Now()
// this pushes the new data and metadata up to etcd
if err := obj.Sync(); err != nil {
return 0, err // TODO: -1 ?
}
obj.cursor = int64(len(obj.data))
return
}
// WriteAt writes into the given file at a certain offset.
func (obj *File) WriteAt(b []byte, off int64) (n int, err error) {
obj.cursor = off
return obj.Write(b)
}
// WriteString writes a string to the file.
func (obj *File) WriteString(s string) (n int, err error) {
return obj.Write([]byte(s))
}
// FileInfo is a struct which provides some information about a file handle.
type FileInfo struct {
file *File // anonymous pointer to the actual file
}
// Name returns the base name of the file.
func (obj *FileInfo) Name() string {
return obj.file.Name()
}
// Size returns the length in bytes.
func (obj *FileInfo) Size() int64 {
return int64(len(obj.file.data))
}
// Mode returns the file mode bits.
func (obj *FileInfo) Mode() os.FileMode {
return obj.file.Mode
}
// ModTime returns the modification time.
func (obj *FileInfo) ModTime() time.Time {
return obj.file.ModTime
}
// IsDir is an abbreviation for Mode().IsDir().
func (obj *FileInfo) IsDir() bool {
//return obj.file.Mode&os.ModeDir != 0
return obj.file.Mode.IsDir()
}
// Sys returns the underlying data source (can return nil).
func (obj *FileInfo) Sys() interface{} {
return nil // TODO: should we do something better?
//return obj.file.fs // TODO: would this work?
}