diff options
Diffstat (limited to 'filesystem/mountpoint.go')
| -rw-r--r-- | filesystem/mountpoint.go | 596 |
1 files changed, 454 insertions, 142 deletions
diff --git a/filesystem/mountpoint.go b/filesystem/mountpoint.go index 12016dd..9be54a4 100644 --- a/filesystem/mountpoint.go +++ b/filesystem/mountpoint.go @@ -22,24 +22,14 @@ package filesystem import ( - "io/ioutil" - "os" -) - -/* -#include <mntent.h> // setmntent, getmntent, endmntent - -// The file containing mountpoints info and how we should read it -const char* mountpoints_filename = "/proc/mounts"; -const char* read_mode = "r"; -*/ -import "C" - -import ( + "bufio" "fmt" + "io" "log" + "os" "path/filepath" "sort" + "strconv" "strings" "sync" @@ -47,86 +37,326 @@ import ( ) var ( - // These maps hold data about the state of the system's mountpoints. + // These maps hold data about the state of the system's filesystems. + // + // They only contain one Mount per filesystem, even if there are + // additional bind mounts, since we want to store fscrypt metadata in + // only one place per filesystem. When it is ambiguous which Mount + // should be used for a filesystem, mountsByDevice will contain an + // explicit nil entry, and mountsByPath won't contain an entry. + mountsByDevice map[DeviceNumber]*Mount mountsByPath map[string]*Mount - mountsByDevice map[string][]*Mount // Used to make the mount functions thread safe mountMutex sync.Mutex // True if the maps have been successfully initialized. mountsInitialized bool // Supported tokens for filesystem links uuidToken = "UUID" + pathToken = "PATH" // Location to perform UUID lookup uuidDirectory = "/dev/disk/by-uuid" ) -// getMountInfo populates the Mount mappings by parsing the filesystem -// description file using the getmntent functions. Returns ErrBadLoad if the -// Mount mappings cannot be populated. -func getMountInfo() error { - if mountsInitialized { - return nil +// Unescape octal-encoded escape sequences in a string from the mountinfo file. +// The kernel encodes the ' ', '\t', '\n', and '\\' bytes this way. This +// function exactly inverts what the kernel does, including by preserving +// invalid UTF-8. +func unescapeString(str string) string { + var sb strings.Builder + for i := 0; i < len(str); i++ { + b := str[i] + if b == '\\' && i+3 < len(str) { + if parsed, err := strconv.ParseInt(str[i+1:i+4], 8, 8); err == nil { + b = uint8(parsed) + i += 3 + } + } + sb.WriteByte(b) } + return sb.String() +} - // make new maps - mountsByPath = make(map[string]*Mount) - mountsByDevice = make(map[string][]*Mount) +// EscapeString is the reverse of unescapeString. Use this to avoid injecting +// spaces or newlines into output that uses these characters as separators. +func EscapeString(str string) string { + var sb strings.Builder + for _, b := range []byte(str) { + switch b { + case ' ', '\t', '\n', '\\': + sb.WriteString(fmt.Sprintf("\\%03o", b)) + default: + sb.WriteByte(b) + } + } + return sb.String() +} - // Load the mount information from mountpoints_filename - fileHandle := C.setmntent(C.mountpoints_filename, C.read_mode) - if fileHandle == nil { - return errors.Wrapf(ErrGlobalMountInfo, "could not read %q", - C.GoString(C.mountpoints_filename)) +func getDeviceName(num DeviceNumber, mountSource string) string { + // When possible, get the device name via the device number rather than + // use the mount source field directly. This is necessary to handle a + // rootfs that was mounted via the kernel command line, since mountinfo + // always shows /dev/root for that. + linkPath := fmt.Sprintf("/sys/dev/block/%v", num) + if target, err := os.Readlink(linkPath); err == nil { + derivedDeviceName := fmt.Sprintf("/dev/%s", filepath.Base(target)) + if _, err := os.Stat(derivedDeviceName); err == nil { + return derivedDeviceName + } + } + // Sysfs is not mounted or is incomplete, or the device nodes are not in + // the standard location. Fall back to using the mount source field if + // it looks like a path. + if strings.HasPrefix(mountSource, "/") { + return mountSource } - defer C.endmntent(fileHandle) + return "" +} - for { - entry := C.getmntent(fileHandle) - // When getmntent returns nil, we have read all of the entries. - if entry == nil { - mountsInitialized = true +// Parse one line of /proc/self/mountinfo. +// +// The line contains the following space-separated fields: +// +// [0] mount ID +// [1] parent ID +// [2] major:minor +// [3] root +// [4] mount point +// [5] mount options +// [6...n-1] optional field(s) +// [n] separator +// [n+1] filesystem type +// [n+2] mount source +// [n+3] super options +// +// For more details, see https://www.kernel.org/doc/Documentation/filesystems/proc.txt +func parseMountInfoLine(line string) *Mount { + fields := strings.Split(line, " ") + if len(fields) < 10 { + return nil + } + + // Count the optional fields. In case new fields are appended later, + // don't simply assume that n == len(fields) - 4. + n := 6 + for fields[n] != "-" { + n++ + if n >= len(fields) { return nil } + } + if n+3 >= len(fields) { + return nil + } + + var mnt *Mount = &Mount{} + var err error + mnt.DeviceNumber, err = newDeviceNumberFromString(fields[2]) + if err != nil { + return nil + } + mnt.Subtree = unescapeString(fields[3]) + mnt.Path = unescapeString(fields[4]) + for _, opt := range strings.Split(fields[5], ",") { + if opt == "ro" { + mnt.ReadOnly = true + } + } + mnt.FilesystemType = unescapeString(fields[n+1]) + mnt.Device = getDeviceName(mnt.DeviceNumber, unescapeString(fields[n+2])) + return mnt +} + +type mountpointTreeNode struct { + mount *Mount + parent *mountpointTreeNode + children []*mountpointTreeNode +} - // Create the Mount structure by converting types. - mnt := Mount{ - Path: C.GoString(entry.mnt_dir), - Filesystem: C.GoString(entry.mnt_type), - Options: strings.Split(C.GoString(entry.mnt_opts), ","), +func addUncontainedSubtreesRecursive(dst map[string]bool, + node *mountpointTreeNode, allUncontainedSubtrees map[string]bool) { + if allUncontainedSubtrees[node.mount.Subtree] { + dst[node.mount.Subtree] = true + } + for _, child := range node.children { + addUncontainedSubtreesRecursive(dst, child, allUncontainedSubtrees) + } +} + +// findMainMount finds the "main" Mount of a filesystem. The "main" Mount is +// where the filesystem's fscrypt metadata is stored. +// +// Normally, there is just one Mount and it's of the entire filesystem +// (mnt.Subtree == "/"). But in general, the filesystem might be mounted in +// multiple places, including "bind mounts" where mnt.Subtree != "/". Also, the +// filesystem might have a combination of read-write and read-only mounts. +// +// To handle most cases, we could just choose a mount with mnt.Subtree == "/", +// preferably a read-write mount. However, that doesn't work in containers +// where the "/" subtree might not be mounted. Here's a real-world example: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /var/lib/lxc/base/rootfs / +// /var/cache/pacman/pkg /var/cache/pacman/pkg +// /srv/repo/x86_64 /srv/http/x86_64 +// +// In this case, all mnt.Subtree are independent. To handle this case, we must +// choose the Mount whose mnt.Path contains the others, i.e. the first one. +// Note: the fscrypt metadata won't be usable from outside the container since +// it won't be at the real root of the filesystem, but that may be acceptable. +// +// However, we can't look *only* at mnt.Path, since in some cases mnt.Subtree is +// needed to correctly handle bind mounts. For example, in the following case, +// the first Mount should be chosen: +// +// mnt.Subtree mnt.Path +// ----------- -------- +// /foo /foo +// /foo/dir /dir +// +// To solve this, we divide the mounts into non-overlapping trees of mnt.Path. +// Then, we choose one of these trees which contains (exactly or via path +// prefix) *all* mnt.Subtree. We then return the root of this tree. In both +// the above examples, this algorithm returns the first Mount. +func findMainMount(filesystemMounts []*Mount) *Mount { + // Index this filesystem's mounts by path. Note: paths are unique here, + // since non-last mounts were already excluded earlier. + // + // Also build the set of all mounted subtrees. + filesystemMountsByPath := make(map[string]*mountpointTreeNode) + allSubtrees := make(map[string]bool) + for _, mnt := range filesystemMounts { + filesystemMountsByPath[mnt.Path] = &mountpointTreeNode{mount: mnt} + allSubtrees[mnt.Subtree] = true + } + + // Divide the mounts into non-overlapping trees of mountpoints. + for path, mntNode := range filesystemMountsByPath { + for path != "/" && mntNode.parent == nil { + path = filepath.Dir(path) + if parent := filesystemMountsByPath[path]; parent != nil { + mntNode.parent = parent + parent.children = append(parent.children, mntNode) + } } + } - // Skip invalid mountpoints - var err error - if mnt.Path, err = cannonicalizePath(mnt.Path); err != nil { - log.Printf("getting mnt_dir: %v", err) + // Build the set of mounted subtrees that aren't contained in any other + // mounted subtree. + allUncontainedSubtrees := make(map[string]bool) + for subtree := range allSubtrees { + contained := false + for t := subtree; t != "/" && !contained; { + t = filepath.Dir(t) + contained = allSubtrees[t] + } + if !contained { + allUncontainedSubtrees[subtree] = true + } + } + + // Select the root of a mountpoint tree whose mounted subtrees contain + // *all* mounted subtrees. Equivalently, select a mountpoint tree in + // which every uncontained subtree is mounted. + var mainMount *Mount + for _, mntNode := range filesystemMountsByPath { + mnt := mntNode.mount + if mntNode.parent != nil { + continue + } + uncontainedSubtrees := make(map[string]bool) + addUncontainedSubtreesRecursive(uncontainedSubtrees, mntNode, allUncontainedSubtrees) + if len(uncontainedSubtrees) != len(allUncontainedSubtrees) { + continue + } + // If there's more than one eligible mount, they should have the + // same Subtree. Otherwise it's ambiguous which one to use. + if mainMount != nil && mainMount.Subtree != mnt.Subtree { + log.Printf("Unsupported case: %q (%v) has multiple non-overlapping mounts. This filesystem will be ignored!", + mnt.Device, mnt.DeviceNumber) + return nil + } + // Prefer a read-write mount to a read-only one. + if mainMount == nil || mainMount.ReadOnly { + mainMount = mnt + } + } + return mainMount +} + +// This is separate from loadMountInfo() only for unit testing. +func readMountInfo(r io.Reader) error { + mountsByDevice = make(map[DeviceNumber]*Mount) + mountsByPath = make(map[string]*Mount) + allMountsByDevice := make(map[DeviceNumber][]*Mount) + allMountsByPath := make(map[string]*Mount) + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + mnt := parseMountInfoLine(line) + if mnt == nil { + log.Printf("ignoring invalid mountinfo line %q", line) continue } + // We can only use mountpoints that are directories for fscrypt. if !isDir(mnt.Path) { - log.Printf("mnt_dir %v: not a directory", mnt.Path) + log.Printf("ignoring mountpoint %q because it is not a directory", mnt.Path) continue } // Note this overrides the info if we have seen the mountpoint // earlier in the file. This is correct behavior because the - // filesystems are listed in mount order. - mountsByPath[mnt.Path] = &mnt - - deviceName, err := cannonicalizePath(C.GoString(entry.mnt_fsname)) - // Only use real valid devices (unlike cgroups, tmpfs, ...) - if err == nil && isDevice(deviceName) { - mnt.Device = deviceName - mountsByDevice[deviceName] = append(mountsByDevice[deviceName], &mnt) + // mountpoints are listed in mount order. + allMountsByPath[mnt.Path] = mnt + } + // For each filesystem, choose a "main" Mount and discard any additional + // bind mounts. fscrypt only cares about the main Mount, since it's + // where the fscrypt metadata is stored. Store all the main Mounts in + // mountsByDevice and mountsByPath so that they can be found later. + for _, mnt := range allMountsByPath { + allMountsByDevice[mnt.DeviceNumber] = + append(allMountsByDevice[mnt.DeviceNumber], mnt) + } + for deviceNumber, filesystemMounts := range allMountsByDevice { + mnt := findMainMount(filesystemMounts) + mountsByDevice[deviceNumber] = mnt // may store an explicit nil entry + if mnt != nil { + mountsByPath[mnt.Path] = mnt + } + } + return nil +} + +// loadMountInfo populates the Mount mappings by parsing /proc/self/mountinfo. +// It returns an error if the Mount mappings cannot be populated. +func loadMountInfo() error { + if !mountsInitialized { + file, err := os.Open("/proc/self/mountinfo") + if err != nil { + return err } + defer file.Close() + if err := readMountInfo(file); err != nil { + return err + } + mountsInitialized = true } + return nil } -// AllFilesystems lists all the Mounts on the current system ordered by path. -// Use CheckSetup() to see if they are used with fscrypt. +func filesystemLacksMainMountError(deviceNumber DeviceNumber) error { + return errors.Errorf("Device %q (%v) lacks a \"main\" mountpoint in the current mount namespace, so it's ambiguous where to store the fscrypt metadata.", + getDeviceName(deviceNumber, ""), deviceNumber) +} + +// AllFilesystems lists all mounted filesystems ordered by path to their "main" +// Mount. Use CheckSetup() to see if they are set up for use with fscrypt. func AllFilesystems() ([]*Mount, error) { mountMutex.Lock() defer mountMutex.Unlock() - if err := getMountInfo(); err != nil { + if err := loadMountInfo(); err != nil { return nil, err } @@ -145,135 +375,217 @@ func UpdateMountInfo() error { mountMutex.Lock() defer mountMutex.Unlock() mountsInitialized = false - return getMountInfo() + return loadMountInfo() } -// FindMount returns the corresponding Mount object for some path in a -// filesystem. Note that in the case of a bind mounts there may be two Mount -// objects for the same underlying filesystem. An error is returned if the path -// is invalid or we cannot load the required mount data. If a filesystem has -// been updated since the last call to one of the mount functions, run -// UpdateMountInfo to see changes. +// FindMount returns the main Mount object for the filesystem which contains the +// file at the specified path. An error is returned if the path is invalid or if +// we cannot load the required mount data. If a mount has been updated since the +// last call to one of the mount functions, run UpdateMountInfo to see changes. func FindMount(path string) (*Mount, error) { - path, err := cannonicalizePath(path) + mountMutex.Lock() + defer mountMutex.Unlock() + if err := loadMountInfo(); err != nil { + return nil, err + } + // First try to find the mount by the number of the containing device. + deviceNumber, err := getNumberOfContainingDevice(path) if err != nil { return nil, err } - - mountMutex.Lock() - defer mountMutex.Unlock() - if err = getMountInfo(); err != nil { + mnt, ok := mountsByDevice[deviceNumber] + if ok { + if mnt == nil { + return nil, filesystemLacksMainMountError(deviceNumber) + } + return mnt, nil + } + // The mount couldn't be found by the number of the containing device. + // Fall back to walking up the directory hierarchy and checking for a + // mount at each directory path. This is necessary for btrfs, where + // files report a different st_dev from the /proc/self/mountinfo entry. + curPath, err := canonicalizePath(path) + if err != nil { return nil, err } - - // Traverse up the directory tree until we find a mountpoint for { - if mnt, ok := mountsByPath[path]; ok { + mnt := mountsByPath[curPath] + if mnt != nil { return mnt, nil } - // Move to the parent directory unless we have reached the root. - parent := filepath.Dir(path) - if parent == path { - return nil, errors.Wrap(ErrNotAMountpoint, path) + parent := filepath.Dir(curPath) + if parent == curPath { + return nil, errors.Errorf("couldn't find mountpoint containing %q", path) } - path = parent + curPath = parent } } -// GetMount returns the Mount object with a matching mountpoint. An error is -// returned if the path is invalid or we cannot load the required mount data. If -// a filesystem has been updated since the last call to one of the mount -// functions, run UpdateMountInfo to see changes. +// GetMount is like FindMount, except GetMount also returns an error if the path +// doesn't name the same file as the filesystem's "main" Mount. For example, if +// a filesystem is fully mounted at "/mnt" and if "/mnt/a" exists, then +// FindMount("/mnt/a") will succeed whereas GetMount("/mnt/a") will fail. This +// is true even if "/mnt/a" is a bind mount of part of the same filesystem. func GetMount(mountpoint string) (*Mount, error) { - mountpoint, err := cannonicalizePath(mountpoint) + mnt, err := FindMount(mountpoint) + if err != nil { + return nil, &ErrNotAMountpoint{mountpoint} + } + // Check whether 'mountpoint' names the same directory as 'mnt.Path'. + // Use os.SameFile() (i.e., compare inode numbers) rather than compare + // canonical paths, since filesystems may be mounted in multiple places. + fi1, err := os.Stat(mountpoint) if err != nil { return nil, err } - - mountMutex.Lock() - defer mountMutex.Unlock() - if err = getMountInfo(); err != nil { + fi2, err := os.Stat(mnt.Path) + if err != nil { return nil, err } - - if mnt, ok := mountsByPath[mountpoint]; ok { - return mnt, nil + if !os.SameFile(fi1, fi2) { + return nil, &ErrNotAMountpoint{mountpoint} } - - return nil, errors.Wrap(ErrNotAMountpoint, mountpoint) + return mnt, nil } -// getMountsFromLink returns the Mount objects which match the provided link. -// This link if formatted as a tag (e.g. <token>=<value>) similar to how they -// apprear in "/etc/fstab". Currently, only "UUID" tokens are supported. Note -// that this can match multiple Mounts (due to the existance of bind mounts). An -// error is returned if the link is invalid or we cannot load the required mount -// data. If a filesystem has been updated since the last call to one of the -// mount functions, run UpdateMountInfo to see the change. -func getMountsFromLink(link string) ([]*Mount, error) { - // Parse the link - linkComponents := strings.Split(link, "=") - if len(linkComponents) != 2 { - return nil, errors.Wrapf(ErrFollowLink, "link %q format in invalid", link) - } - token := linkComponents[0] - value := linkComponents[1] - if token != uuidToken { - return nil, errors.Wrapf(ErrFollowLink, "token type %q not supported", token) - } - - // See if UUID points to an existing device - searchPath := filepath.Join(uuidDirectory, value) - if filepath.Base(searchPath) != value { - return nil, errors.Wrapf(ErrFollowLink, "value %q is not a UUID", value) - } - devicePath, err := cannonicalizePath(searchPath) - if err != nil { - return nil, errors.Wrapf(ErrFollowLink, "no device with UUID %q", value) - } +func uuidToDeviceNumber(uuid string) (DeviceNumber, error) { + uuidSymlinkPath := filepath.Join(uuidDirectory, uuid) + return getDeviceNumber(uuidSymlinkPath) +} - // Lookup mountpoints for device in global store +func deviceNumberToMount(deviceNumber DeviceNumber) (*Mount, bool) { mountMutex.Lock() defer mountMutex.Unlock() - if err := getMountInfo(); err != nil { - return nil, err - } - mnts, ok := mountsByDevice[devicePath] - if !ok { - return nil, errors.Wrapf(ErrFollowLink, "no mounts for device %q", devicePath) + if err := loadMountInfo(); err != nil { + log.Print(err) + return nil, false } - return mnts, nil + mnt, ok := mountsByDevice[deviceNumber] + return mnt, ok } -// makeLink returns a link of the form <token>=<value> where value is the tag -// value for the Mount's device. Currently, only "UUID" tokens are supported. An -// error is returned if the mount has no device, or no UUID. -func makeLink(mnt *Mount, token string) (string, error) { - if token != uuidToken { - return "", errors.Wrapf(ErrMakeLink, "token type %q not supported", token) +// getMountFromLink returns the main Mount, if any, for the filesystem which the +// given link points to. The link should contain a series of token-value pairs +// (<token>=<value>), one per line. The supported tokens are "UUID" and "PATH". +// If the UUID is present and it works, then it is used; otherwise, PATH is used +// if it is present. (The fallback from UUID to PATH will keep the link working +// if the UUID of the target filesystem changes but its mountpoint doesn't.) +// +// If a mount has been updated since the last call to one of the mount +// functions, make sure to run UpdateMountInfo first. +func getMountFromLink(link string) (*Mount, error) { + // Parse the link. + uuid := "" + path := "" + lines := strings.Split(link, "\n") + for _, line := range lines { + line := strings.TrimSpace(line) + if line == "" { + continue + } + pair := strings.Split(line, "=") + if len(pair) != 2 { + log.Printf("ignoring invalid line in filesystem link file: %q", line) + continue + } + token := pair[0] + value := pair[1] + switch token { + case uuidToken: + uuid = value + case pathToken: + path = value + default: + log.Printf("ignoring unknown link token %q", token) + } + } + // At least one of UUID and PATH must be present. + if uuid == "" && path == "" { + return nil, &ErrFollowLink{link, errors.Errorf("invalid filesystem link file")} + } + + // Try following the UUID. + errMsg := "" + if uuid != "" { + deviceNumber, err := uuidToDeviceNumber(uuid) + if err == nil { + mnt, ok := deviceNumberToMount(deviceNumber) + if mnt != nil { + log.Printf("resolved filesystem link using UUID %q", uuid) + return mnt, nil + } + if ok { + return nil, &ErrFollowLink{link, filesystemLacksMainMountError(deviceNumber)} + } + log.Printf("cannot find filesystem with UUID %q", uuid) + } else { + log.Printf("cannot find filesystem with UUID %q: %v", uuid, err) + } + errMsg += fmt.Sprintf("cannot find filesystem with UUID %q", uuid) + if path != "" { + log.Printf("falling back to using mountpoint path instead of UUID") + } } - if mnt.Device == "" { - return "", errors.Wrapf(ErrMakeLink, "no device for mount %q", mnt.Path) + // UUID didn't work. As a fallback, try the mountpoint path. + if path != "" { + mnt, err := GetMount(path) + if mnt != nil { + log.Printf("resolved filesystem link using mountpoint path %q", path) + return mnt, nil + } + log.Print(err) + if errMsg == "" { + errMsg = fmt.Sprintf("cannot find filesystem with main mountpoint %q", path) + } else { + errMsg += fmt.Sprintf(" or main mountpoint %q", path) + } } + // No method worked; return an error. + return nil, &ErrFollowLink{link, errors.New(errMsg)} +} - dirContents, err := ioutil.ReadDir(uuidDirectory) +func (mnt *Mount) getFilesystemUUID() (string, error) { + dirEntries, err := os.ReadDir(uuidDirectory) if err != nil { - return "", errors.Wrap(ErrMakeLink, err.Error()) + return "", err } - for _, fileInfo := range dirContents { + for _, dirEntry := range dirEntries { + fileInfo, err := dirEntry.Info() + if err != nil { + continue + } if fileInfo.Mode()&os.ModeSymlink == 0 { continue // Only interested in UUID symlinks } uuid := fileInfo.Name() - devicePath, err := cannonicalizePath(filepath.Join(uuidDirectory, uuid)) + deviceNumber, err := uuidToDeviceNumber(uuid) if err != nil { log.Print(err) continue } - if mnt.Device == devicePath { - return fmt.Sprintf("%s=%s", uuidToken, uuid), nil + if mnt.DeviceNumber == deviceNumber { + return uuid, nil } } - return "", errors.Wrapf(ErrMakeLink, "device %q has no UUID", mnt.Device) + return "", errors.Errorf("cannot determine UUID of device %q (%v)", + mnt.Device, mnt.DeviceNumber) +} + +// makeLink creates the contents of a link file which will point to the given +// filesystem. This will normally be a string of the form +// "UUID=<uuid>\nPATH=<path>\n". If the UUID cannot be determined, the UUID +// portion will be omitted. +func makeLink(mnt *Mount) (string, error) { + uuid, err := mnt.getFilesystemUUID() + if err != nil { + // The UUID could not be determined. This happens for btrfs + // filesystems, as the device number found via + // /dev/disk/by-uuid/* for btrfs filesystems differs from the + // actual device number of the mounted filesystem. Just rely + // entirely on the fallback to mountpoint path. + log.Print(err) + return fmt.Sprintf("%s=%s\n", pathToken, mnt.Path), nil + } + return fmt.Sprintf("%s=%s\n%s=%s\n", uuidToken, uuid, pathToken, mnt.Path), nil } |